Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-12-13 15:59:52 -05:00
commit 7a386b45a5
42 changed files with 2126 additions and 1103 deletions

View File

@ -711,6 +711,8 @@ public class SAMDataSource {
* @param validationStringency validation stringency. * @param validationStringency validation stringency.
*/ */
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) { public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1;
for(SAMReaderID readerID: readerIDs) { for(SAMReaderID readerID: readerIDs) {
File indexFile = findIndexFile(readerID.samFile); File indexFile = findIndexFile(readerID.samFile);
@ -728,8 +730,7 @@ public class SAMDataSource {
reader.enableFileSource(true); reader.enableFileSource(true);
reader.setValidationStringency(validationStringency); reader.setValidationStringency(validationStringency);
final SAMFileHeader header = reader.getFileHeader(); logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
readers.put(readerID,reader); readers.put(readerID,reader);
} }

View File

@ -142,20 +142,75 @@ public class SampleDB {
* @return * @return
*/ */
public final Map<String, Set<Sample>> getFamilies() { public final Map<String, Set<Sample>> getFamilies() {
return getFamilies(null);
}
/**
* Returns a map from family ID -> set of family members for all samples in sampleIds with
* non-null family ids
*
* @param sampleIds - all samples to include. If null is passed then all samples are returned.
* @return
*/
public final Map<String, Set<Sample>> getFamilies(Collection<String> sampleIds) {
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>(); final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
for ( final Sample sample : samples.values() ) { for ( final Sample sample : samples.values() ) {
final String famID = sample.getFamilyID(); if(sampleIds == null || sampleIds.contains(sample.getID())){
if ( famID != null ) { final String famID = sample.getFamilyID();
if ( ! families.containsKey(famID) ) if ( famID != null ) {
families.put(famID, new TreeSet<Sample>()); if ( ! families.containsKey(famID) )
families.get(famID).add(sample); families.put(famID, new TreeSet<Sample>());
families.get(famID).add(sample);
}
} }
} }
return families; return families;
} }
/**
* Returns the set of all children that have both of their parents.
* Note that if a family is composed of more than 1 child, each child is
* returned.
* @return - all the children that have both of their parents
*/
public final Set<Sample> getChildrenWithParents(){
return getChildrenWithParents(false);
}
/**
* Returns the set of all children that have both of their parents.
* Note that if triosOnly = false, a family is composed of more than 1 child, each child is
* returned.
*
* This method can be used wherever trios are needed
*
* @param triosOnly - if set to true, only strict trios are returned
* @return - all the children that have both of their parents
*/
public final Set<Sample> getChildrenWithParents(boolean triosOnly) {
Map<String, Set<Sample>> families = getFamilies();
final Set<Sample> childrenWithParents = new HashSet<Sample>();
Iterator<Sample> sampleIterator;
for ( Set<Sample> familyMembers: families.values() ) {
if(triosOnly && familyMembers.size() != 3)
continue;
sampleIterator = familyMembers.iterator();
Sample sample;
while(sampleIterator.hasNext()){
sample = sampleIterator.next();
if(sample.getParents().size() == 2 && familyMembers.containsAll(sample.getParents()))
childrenWithParents.add(sample);
}
}
return childrenWithParents;
}
/** /**
* Return all samples with a given family ID * Return all samples with a given family ID
* @param familyId * @param familyId

View File

@ -88,7 +88,7 @@ public abstract class Walker<MapType, ReduceType> {
return getToolkit().getMasterSequenceDictionary(); return getToolkit().getMasterSequenceDictionary();
} }
protected SampleDB getSampleDB() { public SampleDB getSampleDB() {
return getToolkit().getSampleDB(); return getToolkit().getSampleDB();
} }

View File

@ -3,22 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays; import java.util.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -30,23 +26,26 @@ import java.util.Map;
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
private MendelianViolation mendelianViolation = null; private MendelianViolation mendelianViolation = null;
private String motherId;
private String fatherId;
private String childId;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( mendelianViolation == null ) { if ( mendelianViolation == null ) {
if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) { if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
} }
else { else {
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line."); throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
} }
} }
Map<String,Object> toRet = new HashMap<String,Object>(1); Map<String,Object> toRet = new HashMap<String,Object>(1);
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() && boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() &&
vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() && vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() &&
vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods(); vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods();
if ( hasAppropriateGenotypes ) if ( hasAppropriateGenotypes )
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId));
return toRet; return toRet;
} }
@ -55,4 +54,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
public List<String> getKeyNames() { return Arrays.asList("MVLR"); } public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
private boolean checkAndSetSamples(SampleDB db){
Set<String> families = db.getFamilyIDs();
if(families.size() != 1)
return false;
Set<Sample> family = db.getFamily(families.iterator().next());
if(family.size() != 3)
return false;
Iterator<Sample> sampleIter = family.iterator();
Sample sample;
for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){
if(sample.getParents().size()==2){
motherId = sample.getMaternalID();
fatherId = sample.getPaternalID();
childId = sample.getID();
return true;
}
}
return false;
}
} }

View File

@ -38,7 +38,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
for ( final Genotype genotype : genotypes ) { for ( final Genotype genotype : genotypes ) {
// we care only about variant calls with likelihoods // we care only about variant calls with likelihoods
if ( genotype.isHomRef() ) if ( !genotype.isHet() && !genotype.isHomVar() )
continue; continue;
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());

View File

@ -12,10 +12,8 @@ import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
/** /**
@ -26,42 +24,33 @@ import java.util.*;
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation { public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation {
private Set<MendelianViolation> fullMVSet = null; private Set<Sample> trios = null;
private final static int REF = 0; private final static int REF = 0;
private final static int HET = 1; private final static int HET = 1;
private final static int HOM = 2; private final static int HOM = 2;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( fullMVSet == null ) { if ( trios == null ) {
fullMVSet = new HashSet<MendelianViolation>();
if ( walker instanceof VariantAnnotator ) { if ( walker instanceof VariantAnnotator ) {
final Map<String,Set<Sample>> families = ((VariantAnnotator) walker).getSampleDB().getFamilies(); trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
for( final Set<Sample> family : families.values() ) {
for( final Sample sample : family ) {
if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now
fullMVSet.add( new MendelianViolation(sample, 0.0) );
}
}
}
} else { } else {
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in."); throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
} }
} }
final Map<String,Object> toRet = new HashMap<String,Object>(1); final Map<String,Object> toRet = new HashMap<String,Object>(1);
final HashSet<MendelianViolation> mvsToTest = new HashSet<MendelianViolation>(); final HashSet<Sample> triosToTest = new HashSet<Sample>();
for( final MendelianViolation mv : fullMVSet ) { for( final Sample child : trios) {
final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() && final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() &&
vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() && vc.hasGenotype(child.getPaternalID()) && vc.getGenotype(child.getPaternalID()).hasLikelihoods() &&
vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods(); vc.hasGenotype(child.getMaternalID()) && vc.getGenotype(child.getMaternalID()).hasLikelihoods();
if ( hasAppropriateGenotypes ) { if ( hasAppropriateGenotypes ) {
mvsToTest.add(mv); triosToTest.add(child);
} }
} }
toRet.put("TDT", calculateTDT( vc, mvsToTest )); toRet.put("TDT", calculateTDT( vc, triosToTest ));
return toRet; return toRet;
} }
@ -72,27 +61,27 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
private double calculateTDT( final VariantContext vc, final Set<MendelianViolation> mvsToTest ) { private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM); final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM);
final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM); final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM);
final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET); final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET); final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET); final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET);
final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET); final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET);
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
return (numer * numer) / denom; return (numer * numer) / denom;
} }
private double calculateNChildren( final VariantContext vc, final Set<MendelianViolation> mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) { private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
final double likelihoodVector[] = new double[mvsToTest.size() * 2]; final double likelihoodVector[] = new double[triosToTest.size() * 2];
int iii = 0; int iii = 0;
for( final MendelianViolation mv : mvsToTest ) { for( final Sample child : triosToTest ) {
final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector(); final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
} }

View File

@ -167,9 +167,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false; protected boolean indelsOnly = false;
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
public String familyStr = null;
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio") @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
public double minGenotypeQualityP = 0.0; public double minGenotypeQualityP = 0.0;

View File

@ -52,7 +52,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
protected enum GenotypeType { AA, AB, BB } protected enum GenotypeType { AA, AB, BB }
protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
this.N = N; this.N = N;
@ -62,24 +62,14 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
/** /**
* Must be overridden by concrete subclasses * Must be overridden by concrete subclasses
* @param GLs genotype likelihoods * @param GLs genotype likelihoods
* @param Alleles Alleles corresponding to GLs * @param Alleles Alleles corresponding to GLs
* @param log10AlleleFrequencyPriors priors * @param log10AlleleFrequencyPriors priors
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results * @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results
*/ */
protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles, protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles,
double[] log10AlleleFrequencyPriors, double[][] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors); double[][] log10AlleleFrequencyLikelihoods,
double[][] log10AlleleFrequencyPosteriors);
/**
* Can be overridden by concrete subclasses
* @param vc variant context with genotype likelihoods
* @param log10AlleleFrequencyPosteriors allele frequency results
* @param AFofMaxLikelihood allele frequency of max likelihood
*
* @return calls
*/
protected abstract GenotypesContext assignGenotypes(VariantContext vc,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood);
} }

View File

@ -1,94 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.variantcontext.Allele;
public class BiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
private Allele A, B;
private int depth;
/**
* Create a new object for sample with given alleles and genotype likelihoods
*
* @param sample sample name
* @param A allele A
* @param B allele B
* @param log10AALikelihoods AA likelihoods
* @param log10ABLikelihoods AB likelihoods
* @param log10BBLikelihoods BB likelihoods
* @param depth the read depth used in creating the likelihoods
*/
public BiallelicGenotypeLikelihoods(String sample,
Allele A,
Allele B,
double log10AALikelihoods,
double log10ABLikelihoods,
double log10BBLikelihoods,
int depth) {
this.sample = sample;
this.A = A;
this.B = B;
this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods};
this.depth = depth;
}
public String getSample() {
return sample;
}
public double getAALikelihoods() {
return GLs[0];
}
public double getABLikelihoods() {
return GLs[1];
}
public double getBBLikelihoods() {
return GLs[2];
}
public double[] getLikelihoods() {
return GLs;
}
public Allele getAlleleA() {
return A;
}
public Allele getAlleleB() {
return B;
}
public int getDepth() {
return depth;
}
}

View File

@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: Aug 4, 2009
* Time: 6:46:09 PM
* To change this template use File | Settings | File Templates.
*/
public enum DiploidGenotype { public enum DiploidGenotype {
AA ('A', 'A'), AA ('A', 'A'),
AC ('A', 'C'), AC ('A', 'C'),
@ -110,6 +103,20 @@ public enum DiploidGenotype {
return conversionMatrix[index1][index2]; return conversionMatrix[index1][index2];
} }
/**
* create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
* @param baseIndex1 base1
* @param baseIndex2 base2
* @return the diploid genotype
*/
public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) {
if ( baseIndex1 == -1 )
throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
if ( baseIndex2 == -1 )
throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
return conversionMatrix[baseIndex1][baseIndex2];
}
private static final DiploidGenotype[][] conversionMatrix = { private static final DiploidGenotype[][] conversionMatrix = {
{ DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT }, { DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT },
{ DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT }, { DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT },

View File

@ -27,80 +27,43 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
//
// code for testing purposes
//
private final static boolean DEBUG = false; private final static boolean DEBUG = false;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
private final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter); super(UAC, N, logger, verboseWriter);
} }
public void getLog10PNonRef(GenotypesContext GLs, List<Allele> alleles, public void getLog10PNonRef(final GenotypesContext GLs,
double[] log10AlleleFrequencyPriors, final List<Allele> alleles,
double[] log10AlleleFrequencyPosteriors) { final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods,
final double[][] log10AlleleFrequencyPosteriors) {
final int numAlleles = alleles.size(); final int numAlleles = alleles.size();
final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
int idxDiag = numAlleles; //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
int incr = numAlleles - 1; linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
for (int k=1; k < numAlleles; k++) {
// multi-allelic approximation, part 1: Ideally
// for each alt allele compute marginal (suboptimal) posteriors -
// compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of
// as a row-wise upper triangular matrix of likelihoods.
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
final int idxAA = 0;
final int idxAB = k;
// yy is always element on the diagonal.
// 2 alleles: BBelement 2
// 3 alleles: BB element 3. CC element 5
// 4 alleles:
final int idxBB = idxDiag;
idxDiag += incr--;
final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
if (numAlleles > 2) {
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
}
}
if (numAlleles > 2) {
// multiallelic approximation, part 2:
// report posteriors for allele that has highest estimated AC
int mostLikelyAlleleIdx = MathUtils.maxElementIndex(bestAFguess);
for (int k=0; k < log10AlleleFrequencyPosteriors.length-1; k++)
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
}
} }
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) { private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); // TODO -- initialize with size of GLs
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) { for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
if ( sample.hasLikelihoods() ) { if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector(); double[] gls = sample.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL) if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls); genotypeLikelihoods.add(gls);
} }
} }
@ -109,9 +72,401 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
final static double approximateLog10SumLog10(double[] vals) {
if ( vals.length < 2 )
throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10");
double approx = approximateLog10SumLog10(vals[0], vals[1]);
for ( int i = 2; i < vals.length; i++ )
approx = approximateLog10SumLog10(approx, vals[i]);
return approx;
}
final static double approximateLog10SumLog10(double small, double big) {
// make sure small is really the smaller value
if ( small > big ) {
final double t = big;
big = small;
small = t;
}
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
return big;
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
return big;
// OK, so |y-x| < tol: we use the following identity then:
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
// we have pre-stored correction for 0,0.1,0.2,... 10.0
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
//double z =Math.log10(1+Math.pow(10.0,-diff));
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
return big + MathUtils.jacobianLogTable[ind];
}
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
// //
// Linearized, ~O(N), implementation. // Multi-allelic implementation.
//
// -------------------------------------------------------------------------------------
private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
// a wrapper around the int array so that we can make it hashable
private static final class ExactACcounts {
private final int[] counts;
private int hashcode = -1;
public ExactACcounts(final int[] counts) {
this.counts = counts;
}
public int[] getCounts() {
return counts;
}
@Override
public boolean equals(Object obj) {
return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false;
}
@Override
public int hashCode() {
if ( hashcode == -1 )
hashcode = Arrays.hashCode(counts);
return hashcode;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(counts[0]);
for ( int i = 1; i < counts.length; i++ ) {
sb.append("/");
sb.append(counts[i]);
}
return sb.toString();
}
}
// This class represents a column in the Exact AC calculation matrix
private static final class ExactACset {
// the counts of the various alternate alleles which this column represents
final ExactACcounts ACcounts;
// the column of the matrix
final double[] log10Likelihoods;
// mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column;
// for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB.
final HashMap<ExactACcounts, Integer> ACsetIndexToPLIndex = new HashMap<ExactACcounts, Integer>();
// to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them
final ArrayList<ExactACcounts> dependentACsetsToDelete = new ArrayList<ExactACcounts>();
public ExactACset(final int size, final ExactACcounts ACcounts) {
this.ACcounts = ACcounts;
log10Likelihoods = new double[size];
}
// sum of all the non-reference alleles
public int getACsum() {
int sum = 0;
for ( int count : ACcounts.getCounts() )
sum += count;
return sum;
}
public boolean equals(Object obj) {
return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false;
}
}
public static void linearExactMultiAllelic(final GenotypesContext GLs,
final int numAlternateAlleles,
final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods,
final double[][] log10AlleleFrequencyPosteriors,
final boolean preserveData) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
final int numSamples = genotypeLikelihoods.size()-1;
final int numChr = 2*numSamples;
// queue of AC conformations to process
final Queue<ExactACset> ACqueue = new LinkedList<ExactACset>();
// mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
// add AC=0 to the queue
int[] zeroCounts = new int[numAlternateAlleles];
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
ACqueue.add(zeroSet);
indexesToACset.put(zeroSet.ACcounts, zeroSet);
// keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY;
while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
// adjust max likelihood seen if needed
maxLog10L = Math.max(maxLog10L, log10LofKs);
}
}
private static double calculateAlleleCountConformation(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods,
final double maxLog10L,
final int numChr,
final boolean preserveData,
final Queue<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods,
final double[][] log10AlleleFrequencyPosteriors) {
if ( DEBUG )
System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
// compute the log10Likelihoods
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
// clean up memory
if ( !preserveData ) {
for ( ExactACcounts index : set.dependentACsetsToDelete ) {
indexesToACset.put(index, null);
if ( DEBUG )
System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
}
}
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
// can we abort early because the log10Likelihoods are so small?
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
if ( DEBUG )
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
// no reason to keep this data around because nothing depends on it
if ( !preserveData )
indexesToACset.put(set.ACcounts, null);
return log10LofK;
}
// iterate over higher frequencies if possible
final int ACwiggle = numChr - set.getACsum();
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
return log10LofK;
ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing
final int numAltAlleles = set.ACcounts.getCounts().length;
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
// add conformations for the k+1 case
int PLindex = 0;
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
ACcountsClone[allele]++;
lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
}
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
if ( ACwiggle > 1 ) {
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
ACcountsClone[allele_i]++;
ACcountsClone[allele_j]++;
lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset);
}
}
}
// if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
// over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
if ( !preserveData && lastSet == null ) {
if ( DEBUG )
System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
}
if ( lastSet != null )
lastSet.dependentACsetsToDelete.add(set.ACcounts);
return log10LofK;
}
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
// also adds it as a dependency to the given callingSetIndex.
// returns the ExactACset if that set was not already in the queue and null otherwise.
private static ExactACset updateACset(final int[] ACcounts,
final int numChr,
final ExactACset callingSet,
final int PLsetIndex,
final Queue<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
final ExactACcounts index = new ExactACcounts(ACcounts);
boolean wasInQueue = true;
if ( !indexesToACset.containsKey(index) ) {
ExactACset set = new ExactACset(numChr/2 +1, index);
indexesToACset.put(index, set);
ACqueue.add(set);
wasInQueue = false;
}
// add the given dependency to the set
final ExactACset set = indexesToACset.get(index);
set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex);
return wasInQueue ? null : set;
}
private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final Queue<ExactACset> ACqueue) {
ExactACset set = null;
for ( ExactACset queued : ACqueue ) {
if ( queued.dependentACsetsToDelete.contains(callingSetIndex) )
set = queued;
}
return set;
}
private static void computeLofK(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods,
final double[][] log10AlleleFrequencyPosteriors) {
set.log10Likelihoods[0] = 0.0; // the zero case
final int totalK = set.getACsum();
// special case for k = 0 over all k
if ( totalK == 0 ) {
for ( int j = 1; j < set.log10Likelihoods.length; j++ )
set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
}
// k > 0 for at least one k
else {
// all possible likelihoods for a given cell from which to choose the max
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
final double[] gl = genotypeLikelihoods.get(j);
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
// initialize
for ( int i = 0; i < numPaths; i++ )
// TODO -- Arrays.fill?
// todo -- is this even necessary? Why not have as else below?
log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
// deal with the AA case first
if ( totalK < 2*j-1 )
log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
// deal with the other possible conformations now
if ( totalK <= 2*j ) { // skip impossible conformations
int conformationIndex = 1;
for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
if ( DEBUG )
System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
log10ConformationLikelihoods[conformationIndex++] =
determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()];
}
}
final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods);
// finally, update the L(j,k) value
set.log10Likelihoods[j] = log10Max - logDenominator;
}
}
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
// determine the power of theta to use
int nonRefAlleles = 0;
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
if ( set.ACcounts.getCounts()[i] > 0 )
nonRefAlleles++;
}
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
int AC = set.ACcounts.getCounts()[i];
log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
// for k=0 we still want to use theta
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
}
}
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
// todo -- arent' there a small number of fixed values that this function can adopt?
// todo -- at a minimum it'd be good to partially compute some of these in ACCounts for performance
// todo -- need to cache PLIndex -> two alleles, compute looping over each PLIndex. Note all other operations are efficient
// todo -- this can be computed once at the start of the all operations
// the closed form representation generalized for multiple alleles is as follows:
// AA: (2j - totalK) * (2j - totalK - 1)
// AB: 2k_b * (2j - totalK)
// AC: 2k_c * (2j - totalK)
// BB: k_b * (k_b - 1)
// BC: 2 * k_b * k_c
// CC: k_c * (k_c - 1)
final int numAltAlleles = ACcounts.length;
// the AX het case
if ( PLindex <= numAltAlleles )
return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK];
int subtractor = numAltAlleles+1;
int subtractions = 0;
do {
PLindex -= subtractor;
subtractor--;
subtractions++;
}
while ( PLindex >= subtractor );
final int k_i = ACcounts[subtractions-1];
// the hom var case (e.g. BB, CC, DD)
final double coeff;
if ( PLindex == 0 ) {
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
}
// the het non-ref case (e.g. BC, BD, CD)
else {
final int k_j = ACcounts[subtractions+PLindex-1];
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
}
return coeff;
}
// -------------------------------------------------------------------------------------
//
// Deprecated bi-allelic ~O(N) implementation. Kept here for posterity.
// //
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
@ -119,6 +474,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors * A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
* for the exact model calculation * for the exact model calculation
*/ */
/*
private final static class ExactACCache { private final static class ExactACCache {
double[] kMinus2, kMinus1, kMinus0; double[] kMinus2, kMinus1, kMinus0;
@ -154,7 +510,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
public int linearExact(GenotypesContext GLs, public int linearExact(GenotypesContext GLs,
double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { double[][] log10AlleleFrequencyLikelihoods,
double[][] log10AlleleFrequencyPosteriors) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs); final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
final int numSamples = genotypeLikelihoods.size()-1; final int numSamples = genotypeLikelihoods.size()-1;
final int numChr = 2*numSamples; final int numChr = 2*numSamples;
@ -171,7 +528,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( k == 0 ) { // special case for k = 0 if ( k == 0 ) { // special case for k = 0
for ( int j=1; j <= numSamples; j++ ) { for ( int j=1; j <= numSamples; j++ ) {
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA]; kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0];
} }
} else { // k > 0 } else { // k > 0
final double[] kMinus1 = logY.getkMinus1(); final double[] kMinus1 = logY.getkMinus1();
@ -184,14 +541,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
double aa = Double.NEGATIVE_INFINITY; double aa = Double.NEGATIVE_INFINITY;
double ab = Double.NEGATIVE_INFINITY; double ab = Double.NEGATIVE_INFINITY;
if (k < 2*j-1) if (k < 2*j-1)
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[idxAA]; aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
if (k < 2*j) if (k < 2*j)
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[idxAB]; ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
double log10Max; double log10Max;
if (k > 1) { if (k > 1) {
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[idxBB]; final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
log10Max = approximateLog10SumLog10(aa, ab, bb); log10Max = approximateLog10SumLog10(aa, ab, bb);
} else { } else {
// we know we aren't considering the BB case, so we can use an optimized log10 function // we know we aren't considering the BB case, so we can use an optimized log10 function
@ -205,7 +562,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// update the posteriors vector // update the posteriors vector
final double log10LofK = kMinus0[numSamples]; final double log10LofK = kMinus0[numSamples];
log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k]; log10AlleleFrequencyLikelihoods[0][k] = log10LofK;
log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k];
// can we abort early? // can we abort early?
lastK = k; lastK = k;
@ -222,202 +580,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
final static double approximateLog10SumLog10(double a, double b, double c) { final static double approximateLog10SumLog10(double a, double b, double c) {
//return softMax(new double[]{a, b, c});
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c); return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
} }
*/
final static double approximateLog10SumLog10(double small, double big) {
// make sure small is really the smaller value
if ( small > big ) {
final double t = big;
big = small;
small = t;
}
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
return big;
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
return big;
// OK, so |y-x| < tol: we use the following identity then:
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
// we have pre-stored correction for 0,0.1,0.2,... 10.0
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
//double z =Math.log10(1+Math.pow(10.0,-diff));
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
return big + MathUtils.jacobianLogTable[ind];
}
/**
* Can be overridden by concrete subclasses
* @param vc variant context with genotype likelihoods
* @param log10AlleleFrequencyPosteriors allele frequency results
* @param AFofMaxLikelihood allele frequency of max likelihood
*
* @return calls
*/
public GenotypesContext assignGenotypes(VariantContext vc,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood) {
if ( !vc.isVariant() )
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
GenotypesContext GLs = vc.getGenotypes();
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1];
ArrayList<String> sampleIndices = new ArrayList<String>();
int sampleIdx = 0;
// todo - optimize initialization
for (int k=0; k <= AFofMaxLikelihood; k++)
for (int j=0; j <= GLs.size(); j++)
pathMetricArray[j][k] = -1e30;
pathMetricArray[0][0] = 0.0;
// todo = can't deal with optimal dynamic programming solution with multiallelic records
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
sampleIndices.addAll(GLs.getSampleNamesOrderedByName());
sampleIdx = GLs.size();
}
else {
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
if ( !genotype.hasLikelihoods() )
continue;
double[] likelihoods = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) {
//System.out.print(sample.getKey()+":");
//for (int k=0; k < likelihoods.length; k++)
// System.out.format("%4.2f ",likelihoods[k]);
//System.out.println();
// all likelihoods are essentially the same: skip this sample and will later on force no call.
//sampleIdx++;
continue;
}
sampleIndices.add(genotype.getSampleName());
for (int k=0; k <= AFofMaxLikelihood; k++) {
double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0];
int bestIndex = k;
if (k>0) {
double m2 = pathMetricArray[sampleIdx][k-1] + likelihoods[1];
if (m2 > bestMetric) {
bestMetric = m2;
bestIndex = k-1;
}
}
if (k>1) {
double m2 = pathMetricArray[sampleIdx][k-2] + likelihoods[2];
if (m2 > bestMetric) {
bestMetric = m2;
bestIndex = k-2;
}
}
pathMetricArray[sampleIdx+1][k] = bestMetric;
tracebackArray[sampleIdx+1][k] = bestIndex;
}
sampleIdx++;
}
}
GenotypesContext calls = GenotypesContext.create();
int startIdx = AFofMaxLikelihood;
for (int k = sampleIdx; k > 0; k--) {
int bestGTguess;
String sample = sampleIndices.get(k-1);
Genotype g = GLs.get(sample);
if ( !g.hasLikelihoods() )
continue;
// if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now,
// and will add no-call genotype to GL's in a second pass
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
double[] likelihoods = g.getLikelihoods().getAsVector();
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
bestGTguess = Utils.findIndexOfMaxEntry(likelihoods);
}
else {
int newIdx = tracebackArray[k][startIdx];;
bestGTguess = startIdx - newIdx;
startIdx = newIdx;
}
// likelihoods are stored row-wise in lower triangular matrix. IE
// for 2 alleles they have ordering AA,AB,BB
// for 3 alleles they are ordered AA,AB,BB,AC,BC,CC
// Get now alleles corresponding to best index
int kk=0;
boolean done = false;
for (int j=0; j < vc.getNAlleles(); j++) {
for (int i=0; i <= j; i++){
if (kk++ == bestGTguess) {
if (i==0)
myAlleles.add(vc.getReference());
else
myAlleles.add(vc.getAlternateAllele(i-1));
if (j==0)
myAlleles.add(vc.getReference());
else
myAlleles.add(vc.getAlternateAllele(j-1));
done = true;
break;
}
}
if (done)
break;
}
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(bestGTguess, likelihoods);
//System.out.println(myAlleles.toString());
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
}
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
if ( !genotype.hasLikelihoods() )
continue;
final Genotype g = GLs.get(genotype.getSampleName());
final double[] likelihoods = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
continue; // regular likelihoods
final double qual = Genotype.NO_LOG10_PERROR;
calls.replace(new Genotype(g.getSampleName(), NO_CALL_ALLELES, qual, null, g.getAttributes(), false));
}
return calls;
}
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
int j = logYMatrix.length - 1;
System.out.printf("-----------------------------------%n");
for (int k=0; k <= numChr; k++) {
double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
}
}
} }

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Map; import java.util.Map;
@ -79,19 +80,17 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
* @param contexts stratified alignment contexts * @param contexts stratified alignment contexts
* @param contextType stratified context type * @param contextType stratified context type
* @param priors priors to use for GLs * @param priors priors to use for GLs
* @param GLs hash of sample->GL to fill in
* @param alternateAlleleToUse the alternate allele to use, null if not set * @param alternateAlleleToUse the alternate allele to use, null if not set
* @param useBAQedPileup should we use the BAQed pileup or the raw one? * @param useBAQedPileup should we use the BAQed pileup or the raw one?
* @return genotype likelihoods per sample for AA, AB, BB * @return variant context where genotypes are no-called but with GLs
*/ */
public abstract Allele getLikelihoods(RefMetaDataTracker tracker, public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref, ReferenceContext ref,
Map<String, AlignmentContext> contexts, Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType, AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors, GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs, Allele alternateAlleleToUse,
Allele alternateAlleleToUse, boolean useBAQedPileup);
boolean useBAQedPileup);
protected int getFilteredDepth(ReadBackedPileup pileup) { protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0; int count = 0;

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// get deletion length // get deletion length
int dLen = Integer.valueOf(bestAltAllele.substring(1)); int dLen = Integer.valueOf(bestAltAllele.substring(1));
// get ref bases of accurate deletion // get ref bases of accurate deletion
int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart()); int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
//System.out.println(new String(ref.getBases())); //System.out.println(new String(ref.getBases()));
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED); private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
public Allele getLikelihoods(RefMetaDataTracker tracker, public VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref, ReferenceContext ref,
Map<String, AlignmentContext> contexts, Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType, AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors, GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs, Allele alternateAlleleToUse,
Allele alternateAlleleToUse, boolean useBAQedPileup) {
boolean useBAQedPileup) {
if ( tracker == null ) if ( tracker == null )
return null; return null;
GenomeLoc loc = ref.getLocus(); GenomeLoc loc = ref.getLocus();
Allele refAllele, altAllele; Allele refAllele, altAllele;
VariantContext vc = null; VariantContext vc = null;
@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(), haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
ref, hsize, numPrefBases); ref, hsize, numPrefBases);
// start making the VariantContext
final int endLoc = calculateEndPos(alleleList, refAllele, loc);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
// For each sample, get genotype likelihoods based on pileup // For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
// initialize the GenotypeLikelihoods
GLs.clear();
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) { for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (pileup != null ) { if (pileup != null ) {
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap()); final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), HashMap<String, Object> attributes = new HashMap<String, Object>();
alleleList, attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
genotypeLikelihoods, attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
getFilteredDepth(pileup))); genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
if (DEBUG) { if (DEBUG) {
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString()); System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
} }
return refAllele; return builder.genotypes(genotypes).make();
} }
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// for indels, stop location is one more than ref allele length
boolean hasNullAltAllele = false;
for ( Allele a : alleles ) {
if ( a.isNull() ) {
hasNullAltAllele = true;
break;
}
}
int endLoc = loc.getStart() + refAllele.length();
if( !hasNullAltAllele )
endLoc--;
return endLoc;
}
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() { public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
return indelLikelihoodMap.get(); return indelLikelihoodMap.get();

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: 6/1/11
* Time: 10:38 AM
* To change this template use File | Settings | File Templates.
*/
public class MultiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
private List<Allele> alleleList;
private int depth;
public MultiallelicGenotypeLikelihoods(String sample,
List<Allele> A,
double[] log10Likelihoods, int depth) {
/* Check for consistency between likelihood vector and number of alleles */
int numAlleles = A.size();
if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
this.sample = sample;
this.alleleList = A;
this.GLs = log10Likelihoods;
this.depth = depth;
}
public String getSample() {
return sample;
}
public double[] getLikelihoods() {
return GLs;
}
public List<Allele> getAlleles() {
return alleleList;
}
public int getDepth() {
return depth;
}
}

View File

@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.*;
import java.util.List;
import java.util.Map;
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
// the alternate allele with the largest sum of quality scores private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50;
protected Byte bestAlternateAllele = null;
private boolean ALLOW_MULTIPLE_ALLELES;
private final boolean useAlleleFromVCF; private final boolean useAlleleFromVCF;
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger); super(UAC, logger);
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
} }
public Allele getLikelihoods(RefMetaDataTracker tracker, public VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref, ReferenceContext ref,
Map<String, AlignmentContext> contexts, Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType, AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors, GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs, Allele alternateAlleleToUse,
Allele alternateAlleleToUse, boolean useBAQedPileup) {
boolean useBAQedPileup) {
if ( !(priors instanceof DiploidSNPGenotypePriors) ) if ( !(priors instanceof DiploidSNPGenotypePriors) )
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model"); throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
byte refBase = ref.getBase(); final boolean[] basesToUse = new boolean[4];
Allele refAllele = Allele.create(refBase, true); final byte refBase = ref.getBase();
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
// find the alternate allele with the largest sum of quality scores // start making the VariantContext
final GenomeLoc loc = ref.getLocus();
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(Allele.create(refBase, true));
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
// find the alternate allele(s) that we should be using
if ( alternateAlleleToUse != null ) { if ( alternateAlleleToUse != null ) {
bestAlternateAllele = alternateAlleleToUse.getBases()[0]; basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
} else if ( useAlleleFromVCF ) { } else if ( useAlleleFromVCF ) {
VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles); final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
// ignore places where we don't have a variant // ignore places where we don't have a SNP
if ( vc == null ) if ( vc == null || !vc.isSNP() )
return null; return null;
if ( !vc.isBiallelic() ) { for ( Allele allele : vc.getAlternateAlleles() )
// for multi-allelic sites go back to the reads and find the most likely alternate allele basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
} else {
bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0];
}
} else { } else {
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);
// how many alternate alleles are we using?
int alleleCounter = countSetBits(basesToUse);
// if there are no non-ref alleles...
if ( alleleCounter == 0 ) {
// if we only want variants, then we don't need to calculate genotype likelihoods
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
return builder.make();
// otherwise, choose any alternate allele (it doesn't really matter)
basesToUse[indexOfRefBase == 0 ? 1 : 0] = true;
}
} }
// if there are no non-ref bases... // create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
if ( bestAlternateAllele == null ) { final int numAltAlleles = countSetBits(basesToUse);
// if we only want variants, then we don't need to calculate genotype likelihoods final int[] alleleOrdering = new int[numAltAlleles + 1];
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) alleleOrdering[0] = indexOfRefBase;
return refAllele; int alleleOrderingIndex = 1;
int numLikelihoods = 1;
// otherwise, choose any alternate allele (it doesn't really matter) for ( int i = 0; i < 4; i++ ) {
bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C'); if ( i != indexOfRefBase && basesToUse[i] ) {
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
alleleOrdering[alleleOrderingIndex++] = i;
numLikelihoods += alleleOrderingIndex;
}
} }
builder.alleles(alleles);
Allele altAllele = Allele.create(bestAlternateAllele, false); // create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) { for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); } if ( useBAQedPileup )
pileup = createBAQedPileup( pileup );
// create the GenotypeLikelihoods object // create the GenotypeLikelihoods object
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error); final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE); final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
if ( nGoodBases == 0 ) if ( nGoodBases == 0 )
continue; continue;
double[] likelihoods = GL.getLikelihoods(); final double[] allLikelihoods = GL.getLikelihoods();
final double[] myLikelihoods = new double[numLikelihoods];
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase); int myLikelihoodsIndex = 0;
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele); for ( int i = 0; i <= numAltAlleles; i++ ) {
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele); for ( int j = i; j <= numAltAlleles; j++ ) {
ArrayList<Allele> aList = new ArrayList<Allele>(); myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
aList.add(refAllele); }
aList.add(altAllele); }
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
// normalize in log space so that max element is zero. // normalize in log space so that max element is zero.
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
} }
return refAllele; return builder.genotypes(genotypes).make();
} }
protected void initializeBestAlternateAllele(byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) { private int countSetBits(boolean[] array) {
int counter = 0;
for ( int i = 0; i < array.length; i++ ) {
if ( array[i] )
counter++;
}
return counter;
}
// fills in the allelesToUse array
protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
int[] qualCounts = new int[4]; int[] qualCounts = new int[4];
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) { for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup(); ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
for ( PileupElement p : pileup ) { for ( PileupElement p : pileup ) {
// ignore deletions // ignore deletions
if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE )) if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) )
continue; continue;
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase()); final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
@ -149,17 +189,31 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
} }
} }
// set the non-ref base with maximum quality score sum if ( ALLOW_MULTIPLE_ALLELES ) {
int maxCount = 0; for ( byte altAllele : BaseUtils.BASES ) {
bestAlternateAllele = null; if ( altAllele == ref )
for ( byte altAllele : BaseUtils.BASES ) { continue;
if ( altAllele == ref ) int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
continue; if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) {
int index = BaseUtils.simpleBaseToBaseIndex(altAllele); allelesToUse[index] = true;
if ( qualCounts[index] > maxCount ) { }
maxCount = qualCounts[index];
bestAlternateAllele = altAllele;
} }
} else {
// set the non-ref base which has the maximum quality score sum
int maxCount = 0;
int indexOfMax = 0;
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( qualCounts[index] > maxCount ) {
maxCount = qualCounts[index];
indexOfMax = index;
}
}
if ( maxCount > 0 )
allelesToUse[indexOfMax] = true;
} }
} }

View File

@ -153,6 +153,18 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false; public boolean IGNORE_SNP_ALLELES = false;
@Hidden
@Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow multiple alleles in discovery", required = false)
public boolean MULTI_ALLELIC = false;
/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then this site will be skipped and a warning printed. Note that genotyping sites with many alternate alleles is both CPU and memory intensive.
*/
@Hidden
@Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
public int MAX_ALTERNATE_ALLELES = 5;
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
public UnifiedArgumentCollection clone() { public UnifiedArgumentCollection clone() {
@ -176,10 +188,12 @@ public class UnifiedArgumentCollection {
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO; uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE; uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
uac.alleles = alleles; uac.alleles = alleles;
uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES;
// todo- arguments to remove // todo- arguments to remove
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES; uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION; uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
uac.MULTI_ALLELIC = MULTI_ALLELIC;
return uac; return uac;
} }

View File

@ -59,6 +59,9 @@ public class UnifiedGenotyperEngine {
EMIT_ALL_SITES EMIT_ALL_SITES
} }
protected static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
protected static final double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
// the unified argument collection // the unified argument collection
private final UnifiedArgumentCollection UAC; private final UnifiedArgumentCollection UAC;
public UnifiedArgumentCollection getUAC() { return UAC; } public UnifiedArgumentCollection getUAC() { return UAC; }
@ -73,11 +76,12 @@ public class UnifiedGenotyperEngine {
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>(); private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
private final double[] log10AlleleFrequencyPriorsSNPs; private final double[][] log10AlleleFrequencyPriorsSNPs;
private final double[] log10AlleleFrequencyPriorsIndels; private final double[][] log10AlleleFrequencyPriorsIndels;
// the allele frequency likelihoods (allocated once as an optimization) // the allele frequency likelihoods (allocated once as an optimization)
private ThreadLocal<double[]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[]>(); private ThreadLocal<double[][]> log10AlleleFrequencyLikelihoods = new ThreadLocal<double[][]>();
private ThreadLocal<double[][]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[][]>();
// the priors object // the priors object
private final GenotypePriors genotypePriorsSNPs; private final GenotypePriors genotypePriorsSNPs;
@ -96,6 +100,7 @@ public class UnifiedGenotyperEngine {
// the standard filter to use for calls below the confidence threshold but above the emit threshold // the standard filter to use for calls below the confidence threshold but above the emit threshold
private static final Set<String> filter = new HashSet<String>(1); private static final Set<String> filter = new HashSet<String>(1);
private final GenomeLocParser genomeLocParser;
private final boolean BAQEnabledOnCMDLine; private final boolean BAQEnabledOnCMDLine;
@ -113,6 +118,7 @@ public class UnifiedGenotyperEngine {
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser();
this.samples = new TreeSet<String>(samples); this.samples = new TreeSet<String>(samples);
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
this.UAC = UAC.clone(); this.UAC = UAC.clone();
@ -122,10 +128,10 @@ public class UnifiedGenotyperEngine {
this.annotationEngine = engine; this.annotationEngine = engine;
N = 2 * this.samples.size(); N = 2 * this.samples.size();
log10AlleleFrequencyPriorsSNPs = new double[N+1]; log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
log10AlleleFrequencyPriorsIndels = new double[N+1]; log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, GenotypeLikelihoodsCalculationModel.Model.SNP); computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, GenotypeLikelihoodsCalculationModel.Model.INDEL); computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP); genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL); genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
@ -152,7 +158,6 @@ public class UnifiedGenotyperEngine {
} }
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
if ( vc == null ) if ( vc == null )
return null; return null;
@ -214,14 +219,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
} }
Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>(); return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
if ( refAllele != null )
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
else
return null;
} }
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) { private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -256,136 +254,170 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vc, false); return new VariantCallContext(vc, false);
} }
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) { public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
// no-call everyone for now return calculateGenotypes(null, null, null, null, vc, model);
List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
Set<Allele> alleles = new LinkedHashSet<Allele>();
alleles.add(refAllele);
boolean addedAltAlleles = false;
GenotypesContext genotypes = GenotypesContext.create();
for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
if ( !addedAltAlleles ) {
addedAltAlleles = true;
// ordering important to maintain consistency
for (Allele a: GL.getAlleles()) {
alleles.add(a);
}
}
HashMap<String, Object> attributes = new HashMap<String, Object>();
//GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods());
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
}
GenomeLoc loc = refContext.getLocus();
int endLoc = calculateEndPos(alleles, refAllele, loc);
return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make();
} }
// private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
// initialize the data for this thread if that hasn't been done yet // initialize the data for this thread if that hasn't been done yet
if ( afcm.get() == null ) { if ( afcm.get() == null ) {
log10AlleleFrequencyPosteriors.set(new double[N+1]); log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
} }
// don't try to genotype too many alternate alleles
if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) {
logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument");
return null;
}
// estimate our confidence in a reference call and return // estimate our confidence in a reference call and return
if ( vc.getNSamples() == 0 ) if ( vc.getNSamples() == 0 ) {
if ( limitedContext )
return null;
return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ? return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ?
estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) :
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
}
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
clearAFarray(log10AlleleFrequencyLikelihoods.get());
clearAFarray(log10AlleleFrequencyPosteriors.get()); clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
// find the most likely frequency // is the most likely frequency conformation AC=0 for all alternate alleles?
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); boolean bestGuessIsRef = true;
// which alternate allele has the highest MLE AC?
int indexOfHighestAlt = -1;
int alleleCountOfHighestAlt = -1;
// determine which alternate alleles have AF>0
boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]);
// if the most likely AC is not 0, then this is a good alternate allele to use
if ( indexOfBestAC != 0 ) {
altAllelesToUse[i] = true;
bestGuessIsRef = false;
}
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
altAllelesToUse[i] = true;
}
// keep track of the "best" alternate allele to use
if ( indexOfBestAC > alleleCountOfHighestAlt) {
alleleCountOfHighestAlt = indexOfBestAC;
indexOfHighestAlt = i;
}
}
// calculate p(f>0) // calculate p(f>0)
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()); // TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]);
double sum = 0.0; double sum = 0.0;
for (int i = 1; i <= N; i++) for (int i = 1; i <= N; i++)
sum += normalizedPosteriors[i]; sum += normalizedPosteriors[i];
double PofF = Math.min(sum, 1.0); // deal with precision errors double PofF = Math.min(sum, 1.0); // deal with precision errors
double phredScaledConfidence; double phredScaledConfidence;
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
if ( Double.isInfinite(phredScaledConfidence) ) if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0];
} else { } else {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
if ( Double.isInfinite(phredScaledConfidence) ) { if ( Double.isInfinite(phredScaledConfidence) ) {
sum = 0.0; sum = 0.0;
for (int i = 1; i <= N; i++) { for (int i = 1; i <= N; i++) {
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
break; break;
sum += log10AlleleFrequencyPosteriors.get()[i]; sum += log10AlleleFrequencyPosteriors.get()[0][i];
} }
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
} }
} }
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) {
// technically, at this point our confidence in a reference call isn't accurately estimated // technically, at this point our confidence in a reference call isn't accurately estimated
// because it didn't take into account samples with no data, so let's get a better estimate // because it didn't take into account samples with no data, so let's get a better estimate
return estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF); return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
} }
// strip out any alleles that aren't going to be used in the VariantContext
List<Allele> myAlleles;
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
myAlleles.add(vc.getReference());
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
if ( altAllelesToUse[i] )
myAlleles.add(vc.getAlternateAllele(i));
}
} else {
// use all of the alleles if we are given them by the user
myAlleles = vc.getAlleles();
}
// start constructing the resulting VC
GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) )
builder.filters(filter);
if ( !limitedContext )
builder.referenceBaseForIndel(refContext.getBase());
// create the genotypes // create the genotypes
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
// print out stats if we have a writer // print out stats if we have a writer
if ( verboseWriter != null ) if ( verboseWriter != null && !limitedContext )
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model); printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
// *** note that calculating strand bias involves overwriting data structures, so we do that last // *** note that calculating strand bias involves overwriting data structures, so we do that last
HashMap<String, Object> attributes = new HashMap<String, Object>(); HashMap<String, Object> attributes = new HashMap<String, Object>();
// if the site was downsampled, record that fact // if the site was downsampled, record that fact
if ( rawContext.hasPileupBeenDownsampled() ) if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true); attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
if ( UAC.COMPUTE_SLOD && bestAFguess != 0 ) {
//final boolean DEBUG_SLOD = false; //final boolean DEBUG_SLOD = false;
// the overall lod // the overall lod
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get());
clearAFarray(log10AlleleFrequencyPosteriors.get()); clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod // the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get());
clearAFarray(log10AlleleFrequencyPosteriors.get()); clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod // the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get());
clearAFarray(log10AlleleFrequencyPosteriors.get()); clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
@ -401,26 +433,12 @@ public class UnifiedGenotyperEngine {
attributes.put("SB", strandScore); attributes.put("SB", strandScore);
} }
GenomeLoc loc = refContext.getLocus(); // finish constructing the resulting VC
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
// strip out the alternate allele if it's a ref call
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new HashSet<Allele>(1);
myAlleles.add(vc.getReference());
}
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles);
builder.genotypes(genotypes); builder.genotypes(genotypes);
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter);
builder.attributes(attributes); builder.attributes(attributes);
builder.referenceBaseForIndel(refContext.getBase());
VariantContext vcCall = builder.make(); VariantContext vcCall = builder.make();
if ( annotationEngine != null ) { if ( annotationEngine != null && !limitedContext ) {
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
ReadBackedPileup pileup = null; ReadBackedPileup pileup = null;
if (rawContext.hasExtendedEventPileup()) if (rawContext.hasExtendedEventPileup())
@ -435,119 +453,6 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
} }
// A barebones entry point to the exact model when there is no tracker or stratified contexts available -- only GLs
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) {
// initialize the data for this thread if that hasn't been done yet
if ( afcm.get() == null ) {
log10AlleleFrequencyPosteriors.set(new double[N+1]);
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
}
// estimate our confidence in a reference call and return
if ( vc.getNSamples() == 0 )
return null;
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
// find the most likely frequency
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
// calculate p(f>0)
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
double sum = 0.0;
for (int i = 1; i <= N; i++)
sum += normalizedPosteriors[i];
double PofF = Math.min(sum, 1.0); // deal with precision errors
double phredScaledConfidence;
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
} else {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
if ( Double.isInfinite(phredScaledConfidence) ) {
sum = 0.0;
for (int i = 1; i <= N; i++) {
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
break;
sum += log10AlleleFrequencyPosteriors.get()[i];
}
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
}
}
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
// technically, at this point our confidence in a reference call isn't accurately estimated
// because it didn't take into account samples with no data, so let's get a better estimate
return null;
}
// create the genotypes
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
// *** note that calculating strand bias involves overwriting data structures, so we do that last
HashMap<String, Object> attributes = new HashMap<String, Object>();
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
// strip out the alternate allele if it's a ref call
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new HashSet<Allele>(1);
myAlleles.add(vc.getReference());
}
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles);
builder.genotypes(genotypes);
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter);
builder.attributes(attributes);
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
return new VariantCallContext(builder.make(), confidentlyCalled(phredScaledConfidence, PofF));
}
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// TODO - temp fix until we can deal with extended events properly
// for indels, stop location is one more than ref allele length
boolean isSNP = true, hasNullAltAllele = false;
for (Allele a : alleles){
if (a.length() != 1) {
isSNP = false;
break;
}
}
for (Allele a : alleles){
if (a.isNull()) {
hasNullAltAllele = true;
break;
}
}
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
// standard insertion: ref allele length = 0, endLos = startLoc
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
//
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
int endLoc = loc.getStart();
if ( !isSNP) {
endLoc += refAllele.length();
if(!hasNullAltAllele)
endLoc--;
}
return endLoc;
}
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) { private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
Map<String, AlignmentContext> stratifiedContexts = null; Map<String, AlignmentContext> stratifiedContexts = null;
@ -604,9 +509,12 @@ public class UnifiedGenotyperEngine {
return stratifiedContexts; return stratifiedContexts;
} }
protected static void clearAFarray(double[] AFs) { protected static void clearAFarray(double[][] AFs) {
for ( int i = 0; i < AFs.length; i++ ) for ( int i = 0; i < AFs.length; i++ ) {
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; for ( int j = 0; j < AFs[i].length; j++ ) {
AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
}
}
} }
private final static double[] binomialProbabilityDepthCache = new double[10000]; private final static double[] binomialProbabilityDepthCache = new double[10000];
@ -676,7 +584,7 @@ public class UnifiedGenotyperEngine {
AFline.append(i + "/" + N + "\t"); AFline.append(i + "/" + N + "\t");
AFline.append(String.format("%.2f\t", ((float)i)/N)); AFline.append(String.format("%.2f\t", ((float)i)/N));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i])); AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
AFline.append("0.00000000\t"); AFline.append("0.00000000\t");
else else
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i])); AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i]));
@ -689,8 +597,8 @@ public class UnifiedGenotyperEngine {
verboseWriter.println(); verboseWriter.println();
} }
protected boolean passesEmitThreshold(double conf, int bestAFguess) { protected boolean passesEmitThreshold(double conf, boolean bestGuessIsRef) {
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
} }
protected boolean passesCallThreshold(double conf) { protected boolean passesCallThreshold(double conf) {
@ -744,27 +652,25 @@ public class UnifiedGenotyperEngine {
return null; return null;
} }
protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) { protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) {
// calculate the allele frequency priors for 1-N
double sum = 0.0;
double heterozygosity;
if (model == GenotypeLikelihoodsCalculationModel.Model.INDEL) // the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i
heterozygosity = UAC.INDEL_HETEROZYGOSITY; for (int alleles = 1; alleles <= priors.length; alleles++) {
else double sum = 0.0;
heterozygosity = UAC.heterozygosity;
// for each i
for (int i = 1; i <= N; i++) { for (int i = 1; i <= N; i++) {
double value = heterozygosity / (double)i; double value = Math.pow(theta, alleles) / (double)i;
priors[i] = Math.log10(value); priors[alleles-1][i] = Math.log10(value);
sum += value; sum += value;
}
// null frequency for AF=0 is (1 - sum(all other frequencies))
priors[alleles-1][0] = Math.log10(1.0 - sum);
} }
// null frequency for AF=0 is (1 - sum(all other frequencies))
priors[0] = Math.log10(1.0 - sum);
} }
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) { switch( model ) {
case SNP: case SNP:
return log10AlleleFrequencyPriorsSNPs; return log10AlleleFrequencyPriorsSNPs;
@ -837,4 +743,88 @@ public class UnifiedGenotyperEngine {
return vc; return vc;
} }
/**
* @param vc variant context with genotype likelihoods
* @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use
*
* @return genotypes
*/
public GenotypesContext assignGenotypes(VariantContext vc,
boolean[] allelesToUse) {
final GenotypesContext GLs = vc.getGenotypes();
final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
final GenotypesContext calls = GenotypesContext.create();
for ( int k = GLs.size() - 1; k >= 0; k-- ) {
final String sample = sampleIndices.get(k);
final Genotype g = GLs.get(sample);
if ( !g.hasLikelihoods() )
continue;
final double[] likelihoods = g.getLikelihoods().getAsVector();
// if there is no mass on the likelihoods, then just no-call the sample
if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) {
calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
continue;
}
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
final int numAltAlleles = allelesToUse.length;
// start with the assumption that the ideal genotype is homozygous reference
Allele maxAllele1 = vc.getReference(), maxAllele2 = vc.getReference();
double maxLikelihoodSeen = likelihoods[0];
int indexOfMax = 0;
// keep track of some state
Allele firstAllele = vc.getReference();
int subtractor = numAltAlleles + 1;
int subtractionsMade = 0;
for ( int i = 1, PLindex = 1; i < likelihoods.length; i++, PLindex++ ) {
if ( PLindex == subtractor ) {
firstAllele = vc.getAlternateAllele(subtractionsMade);
PLindex -= subtractor;
subtractor--;
subtractionsMade++;
// we can skip this allele if it's not usable
if ( !allelesToUse[subtractionsMade-1] ) {
i += subtractor - 1;
PLindex += subtractor - 1;
continue;
}
}
// we don't care about the entry if we've already seen better
if ( likelihoods[i] <= maxLikelihoodSeen )
continue;
// if it's usable then update the alleles
int alleleIndex = subtractionsMade + PLindex - 1;
if ( allelesToUse[alleleIndex] ) {
maxAllele1 = firstAllele;
maxAllele2 = vc.getAlternateAllele(alleleIndex);
maxLikelihoodSeen = likelihoods[i];
indexOfMax = i;
}
}
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
myAlleles.add(maxAllele1);
myAlleles.add(maxAllele2);
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(indexOfMax, likelihoods);
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
}
return calls;
}
} }

View File

@ -320,17 +320,17 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
if(transmissionProb != NO_TRANSMISSION_PROB) if(transmissionProb != NO_TRANSMISSION_PROB)
phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb)); phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb));
//Handle null, missing and unavailable genotypes //Handle null, missing and unavailable genotypes
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable //Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
//genotype so it is safe to return the original genotype in this case. //genotype so it is safe to return the original genotype in this case.
//In addition, if the phasing confidence is 0, then return the unphased, original genotypes. //In addition, if the phasing confidence is 0, then return the unphased, original genotypes.
if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType())) if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType()))
return genotype; return genotype;
//Add the transmission probability //Add the transmission probability
Map<String, Object> genotypeAttributes = new HashMap<String, Object>(); Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
genotypeAttributes.putAll(genotype.getAttributes()); genotypeAttributes.putAll(genotype.getAttributes());
if(transmissionProb>NO_TRANSMISSION_PROB) if(transmissionProb>NO_TRANSMISSION_PROB)
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission); genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2); ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2);

View File

@ -164,13 +164,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false) @Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
protected double MIN_PHASE_QUALITY = 10.0; protected double MIN_PHASE_QUALITY = 10.0;
/** @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50.", required=false)
* This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined.
*/
@Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false)
protected String FAMILY_STRUCTURE;
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
@ -561,8 +555,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; } public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; }
public String getFamilyStructure() { return FAMILY_STRUCTURE; }
public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; } public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; }
public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; } public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; }

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -7,9 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.io.PrintStream;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
/** /**
* Mendelian violation detection and counting * Mendelian violation detection and counting
@ -40,12 +44,25 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator") @Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
public class MendelianViolationEvaluator extends VariantEvaluator { public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "Number of mendelian variants found") @DataPoint(description = "Number of variants found with at least one family having genotypes")
long nVariants; long nVariants;
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall")
long nSkipped;
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)")
long nFamCalled;
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.")
long nVarFamCalled;
@DataPoint(description="Number of variants x families discarded as low quality")
long nLowQual;
@DataPoint(description="Number of variants x families discarded as no call")
long nNoCall;
@DataPoint(description="Number of loci with mendelian violations")
long nLociViolations;
@DataPoint(description = "Number of mendelian violations found") @DataPoint(description = "Number of mendelian violations found")
long nViolations; long nViolations;
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
long KidHomRef_ParentHomVar; long KidHomRef_ParentHomVar;
@DataPoint(description = "number of child het calls where the parent was hom ref") @DataPoint(description = "number of child het calls where the parent was hom ref")
long KidHet_ParentsHomRef; long KidHet_ParentsHomRef;
@ -53,11 +70,65 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
long KidHet_ParentsHomVar; long KidHet_ParentsHomVar;
@DataPoint(description = "number of child hom variant calls where the parent was hom ref") @DataPoint(description = "number of child hom variant calls where the parent was hom ref")
long KidHomVar_ParentHomRef; long KidHomVar_ParentHomRef;
*/
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR")
long mvRefRef_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET")
long mvRefRef_Het;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR")
long mvRefHet_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR")
long mvRefVar_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF")
long mvRefVar_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF")
long mvVarHet_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF")
long mvVarVar_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET")
long mvVarVar_Het;
/*@DataPoint(description ="Number of inherited var alleles from het parents")
long nInheritedVar;
@DataPoint(description ="Number of inherited ref alleles from het parents")
long nInheritedRef;*/
@DataPoint(description="Number of HomRef/HomRef/HomRef trios")
long HomRefHomRef_HomRef;
@DataPoint(description="Number of Het/Het/Het trios")
long HetHet_Het;
@DataPoint(description="Number of Het/Het/HomRef trios")
long HetHet_HomRef;
@DataPoint(description="Number of Het/Het/HomVar trios")
long HetHet_HomVar;
@DataPoint(description="Number of HomVar/HomVar/HomVar trios")
long HomVarHomVar_HomVar;
@DataPoint(description="Number of HomRef/HomVar/Het trios")
long HomRefHomVAR_Het;
@DataPoint(description="Number of ref alleles inherited from het/het parents")
long HetHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from het/het parents")
long HetHet_inheritedVar;
@DataPoint(description="Number of ref alleles inherited from homRef/het parents")
long HomRefHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from homRef/het parents")
long HomRefHet_inheritedVar;
@DataPoint(description="Number of ref alleles inherited from homVar/het parents")
long HomVarHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from homVar/het parents")
long HomVarHet_inheritedVar;
MendelianViolation mv; MendelianViolation mv;
PrintStream mvFile;
Map<String,Set<Sample>> families;
public void initialize(VariantEvalWalker walker) { public void initialize(VariantEvalWalker walker) {
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold()); //Changed by Laurent Francioli - 2011-06-07
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
families = walker.getSampleDB().getFamilies();
} }
public boolean enabled() { public boolean enabled() {
@ -75,110 +146,48 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
if (mv.setAlleles(vc)) {
if(mv.countViolations(families,vc)>0){
nLociViolations++;
nViolations += mv.getViolationsCount();
mvRefRef_Var += mv.getParentsRefRefChildVar();
mvRefRef_Het += mv.getParentsRefRefChildHet();
mvRefHet_Var += mv.getParentsRefHetChildVar();
mvRefVar_Var += mv.getParentsRefVarChildVar();
mvRefVar_Ref += mv.getParentsRefVarChildRef();
mvVarHet_Ref += mv.getParentsVarHetChildRef();
mvVarVar_Ref += mv.getParentsVarVarChildRef();
mvVarVar_Het += mv.getParentsVarVarChildHet();
}
HomRefHomRef_HomRef += mv.getRefRefRef();
HetHet_Het += mv.getHetHetHet();
HetHet_HomRef += mv.getHetHetHomRef();
HetHet_HomVar += mv.getHetHetHomVar();
HomVarHomVar_HomVar += mv.getVarVarVar();
HomRefHomVAR_Het += mv.getRefVarHet();
HetHet_inheritedRef += mv.getParentsHetHetInheritedRef();
HetHet_inheritedVar += mv.getParentsHetHetInheritedVar();
HomRefHet_inheritedRef += mv.getParentsRefHetInheritedRef();
HomRefHet_inheritedVar += mv.getParentsRefHetInheritedVar();
HomVarHet_inheritedRef += mv.getParentsVarHetInheritedRef();
HomVarHet_inheritedVar += mv.getParentsVarHetInheritedVar();
if(mv.getFamilyCalledCount()>0){
nVariants++; nVariants++;
nFamCalled += mv.getFamilyCalledCount();
Genotype momG = vc.getGenotype(mv.getSampleMom()); nLowQual += mv.getFamilyLowQualsCount();
Genotype dadG = vc.getGenotype(mv.getSampleDad()); nNoCall += mv.getFamilyNoCallCount();
Genotype childG = vc.getGenotype(mv.getSampleChild()); nVarFamCalled += mv.getVarFamilyCalledCount();
if (mv.isViolation()) {
nViolations++;
String label;
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
label = "KidHomRef_ParentHomVar";
KidHomRef_ParentHomVar++;
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
label = "KidHet_ParentsHomRef";
KidHet_ParentsHomRef++;
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
label = "KidHet_ParentsHomVar";
KidHet_ParentsHomVar++;
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
label = "KidHomVar_ParentHomRef";
KidHomVar_ParentHomRef++;
} else {
throw new ReviewedStingException("BUG: unexpected child genotype class " + childG);
}
return "MendelViolation=" + label;
}
} }
} else{
nSkipped++;
return null; // we don't capture any intersting sites
}
/*
private double getQThreshold() {
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
//return 0.0;
}
TrioStructure trio;
double mendelianViolationQualThreshold;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public static class TrioStructure {
public String mom, dad, child;
}
public static TrioStructure parseTrioDescription(String family) {
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
TrioStructure trio = new TrioStructure();
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
trio.mom = m.group(1);
trio.dad = m.group(2);
trio.child = m.group(3);
return trio;
} else {
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
}
public void initialize(VariantEvalWalker walker) {
trio = parseTrioDescription(walker.getFamilyStructure());
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
}
private boolean includeGenotype(Genotype g) {
return g.getLog10PError() > getQThreshold() && g.isCalled();
}
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
}
public static boolean isViolation(VariantContext vc, TrioStructure trio ) {
return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) );
}
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
//VariantContext momVC = vc.subContextFromGenotypes(momG);
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
int i = 0;
Genotype childG = new Genotype("kidG", childA);
for (Allele momAllele : momA) {
for (Allele dadAllele : dadA) {
if (momAllele.isCalled() && dadAllele.isCalled()) {
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
if (childG.sameGenotype(possibleChild)) {
return false;
}
}
} }
return null;
} }
return true; return null; // we don't capture any interesting sites
} }
*/
} }

View File

@ -26,9 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -41,7 +42,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
@ -180,7 +180,7 @@ import java.util.*;
* </pre> * </pre>
* *
*/ */
public class SelectVariants extends RodWalker<Integer, Integer> { public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/** /**
@ -282,6 +282,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false) @Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
private double fractionRandom = 0; private double fractionRandom = 0;
@Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
private double fractionGenotypes = 0;
/** /**
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria. * This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
* When specified one or more times, a particular type of variant is selected. * When specified one or more times, a particular type of variant is selected.
@ -325,7 +328,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private boolean DISCORDANCE_ONLY = false; private boolean DISCORDANCE_ONLY = false;
private boolean CONCORDANCE_ONLY = false; private boolean CONCORDANCE_ONLY = false;
private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>(); private MendelianViolation mv;
/* variables used by the SELECT RANDOM modules */ /* variables used by the SELECT RANDOM modules */
@ -344,6 +347,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private PrintStream outMVFileStream = null; private PrintStream outMVFileStream = null;
//Random number generator for the genotypes to remove
private Random randomGenotypes = new Random();
/** /**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
@ -380,8 +385,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
for ( String sample : samples ) for ( String sample : samples )
logger.info("Including sample '" + sample + "'"); logger.info("Including sample '" + sample + "'");
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include // if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
if (TYPES_TO_INCLUDE.isEmpty()) { if (TYPES_TO_INCLUDE.isEmpty()) {
@ -421,29 +424,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName()); if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
if (MENDELIAN_VIOLATIONS) { if (MENDELIAN_VIOLATIONS) {
if ( FAMILY_STRUCTURE_FILE != null) { mv = new MendelianViolation(MENDELIAN_VIOLATION_QUAL_THRESHOLD,false,true);
try {
for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) {
MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom()))
mvSet.add(mv);
}
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
}
if (outMVFile != null)
try {
outMVFileStream = new PrintStream(outMVFile);
}
catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
}
else
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
}
else if (!FAMILY_STRUCTURE.isEmpty()) {
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
MENDELIAN_VIOLATIONS = true;
} }
SELECT_RANDOM_NUMBER = numRandom > 0; SELECT_RANDOM_NUMBER = numRandom > 0;
@ -479,26 +460,26 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
} }
for (VariantContext vc : vcs) { for (VariantContext vc : vcs) {
if (MENDELIAN_VIOLATIONS) { if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
boolean foundMV = false; break;
for (MendelianViolation mv : mvSet) {
if (mv.isViolation(vc)) { if (outMVFile != null){
foundMV = true; for( String familyId : mv.getViolationFamilies()){
//System.out.println(vc.toString()); for(Sample sample : this.getSampleDB().getFamily(familyId)){
if (outMVFile != null) if(sample.getParents().size() > 0){
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() ); vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
}
} }
} }
if (!foundMV)
break;
} }
if (DISCORDANCE_ONLY) { if (DISCORDANCE_ONLY) {
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation()); Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
if (!isDiscordant(vc, compVCs)) if (!isDiscordant(vc, compVCs))
@ -629,6 +610,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Override @Override
public Integer reduce(Integer value, Integer sum) { return value + sum; } public Integer reduce(Integer value, Integer sum) { return value + sum; }
@Override
public Integer treeReduce(Integer lhs, Integer rhs) {
return lhs + rhs;
}
public void onTraversalDone(Integer result) { public void onTraversalDone(Integer result) {
logger.info(result + " records processed."); logger.info(result + " records processed.");
@ -657,9 +643,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles()); final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
VariantContextBuilder builder = new VariantContextBuilder(sub); VariantContextBuilder builder = new VariantContextBuilder(sub);
GenotypesContext newGC = sub.getGenotypes();
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
if ( vc.getAlleles().size() != sub.getAlleles().size() ) if ( vc.getAlleles().size() != sub.getAlleles().size() )
builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes())); newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
//Remove a fraction of the genotypes if needed
if(fractionGenotypes>0){
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
alleles.add(Allele.create((byte)'.'));
alleles.add(Allele.create((byte)'.'));
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
}
else{
genotypes.add(genotype);
}
}
newGC = GenotypesContext.create(genotypes);
}
builder.genotypes(newGC);
int depth = 0; int depth = 0;
for (String sample : sub.getSampleNames()) { for (String sample : sub.getSampleNames()) {

View File

@ -1,147 +1,394 @@
package org.broadinstitute.sting.utils; package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection; import java.util.*;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** /**
* User: carneiro * User: carneiro / lfran
* Date: 3/9/11 * Date: 3/9/11
* Time: 12:38 PM * Time: 12:38 PM
*
* Class for the identification and tracking of mendelian violation. It can be used in 2 distinct ways:
* - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while
* walking over the variants
* - Or using the static methods to directly get information about mendelian violation in a family at a given locus
*
*/ */
public class MendelianViolation { public class MendelianViolation {
String sampleMom; //List of families with violations
String sampleDad; private List<String> violationFamilies;
String sampleChild;
List allelesMom; //Call information
List allelesDad; private int nocall = 0;
List allelesChild; private int familyCalled = 0;
private int varFamilyCalled = 0;
private int lowQual = 0;
double minGenotypeQuality; private boolean allCalledOnly = true;
//Stores occurrences of inheritance
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
private int violations_total=0;
private double minGenotypeQuality;
private boolean abortOnSampleNotFound;
//Number of families with genotype information for all members
public int getFamilyCalledCount(){
return familyCalled;
}
//Number of families with genotype information for all members
public int getVarFamilyCalledCount(){
return varFamilyCalled;
}
//Number of families missing genotypes for one or more of their members
public int getFamilyNoCallCount(){
return nocall;
}
//Number of families with genotypes below the set quality threshold
public int getFamilyLowQualsCount(){
return lowQual;
}
public int getViolationsCount(){
return violations_total;
}
//Count of alt alleles inherited from het parents (no violation)
public int getParentHetInheritedVar(){
return getParentsHetHetInheritedVar() + getParentsRefHetInheritedVar() + getParentsVarHetInheritedVar();
}
//Count of ref alleles inherited from het parents (no violation)
public int getParentHetInheritedRef(){
return getParentsHetHetInheritedRef() + getParentsRefHetInheritedRef() + getParentsVarHetInheritedRef();
}
//Count of HomRef/HomRef/HomRef trios
public int getRefRefRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
}
//Count of HomVar/HomVar/HomVar trios
public int getVarVarVar(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
}
//Count of HomRef/HomVar/Het trios
public int getRefVarHet(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
}
//Count of Het/Het/Het trios
public int getHetHetHet(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
}
//Count of Het/Het/HomRef trios
public int getHetHetHomRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
}
//Count of Het/Het/HomVar trios
public int getHetHetHomVar(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
}
//Count of ref alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
//return parentsHetHet_childRef;
}
//Count of var alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedVar(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
//return parentsHetHet_childVar;
}
//Count of ref alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
//return parentsHomRefHet_childRef;
}
//Count of var alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
//return parentsHomRefHet_childVar;
}
//Count of ref alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedRef(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
//return parentsHomVarHet_childRef;
}
//Count of var alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedVar(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
//return parentsHomVarHet_childVar;
}
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
public int getParentsRefRefChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_REF -> HET
public int getParentsRefRefChildHet(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
}
//Count of violations of the type HOM_REF/HET -> HOM_VAR
public int getParentsRefHetChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
public int getParentsRefVarChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
public int getParentsRefVarChildRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HET -> HOM_REF
public int getParentsVarHetChildRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
public int getParentsVarVarChildRef(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
public int getParentsVarVarChildHet(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
}
//Count of violations of the type HOM_VAR/? -> HOM_REF
public int getParentVarChildRef(){
return getParentsRefVarChildRef() + getParentsVarHetChildRef() +getParentsVarVarChildRef();
}
//Count of violations of the type HOM_REF/? -> HOM_VAR
public int getParentRefChildVar(){
return getParentsRefVarChildVar() + getParentsRefHetChildVar() +getParentsRefRefChildVar();
}
//Returns a String containing all trios where a Mendelian violation was observed.
//The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..."
public String getViolationFamiliesString(){
if(violationFamilies.isEmpty())
return "";
Iterator<String> it = violationFamilies.iterator();
String violationFams = it.next();
while(it.hasNext()){
violationFams += ","+it.next();
}
return violationFams;
}
public List<String> getViolationFamilies(){
return violationFamilies;
}
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public String getSampleMom() {
return sampleMom;
}
public String getSampleDad() {
return sampleDad;
}
public String getSampleChild() {
return sampleChild;
}
public double getMinGenotypeQuality() { public double getMinGenotypeQuality() {
return minGenotypeQuality; return minGenotypeQuality;
} }
/** /**
* * Constructor
* @param sampleMomP - sample name of mom
* @param sampleDadP - sample name of dad
* @param sampleChildP - sample name of child
*/
public MendelianViolation (String sampleMomP, String sampleDadP, String sampleChildP) {
sampleMom = sampleMomP;
sampleDad = sampleDadP;
sampleChild = sampleChildP;
}
/**
*
* @param family - the sample names string "mom+dad=child"
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
*/
public MendelianViolation(String family, double minGenotypeQualityP) {
minGenotypeQuality = minGenotypeQualityP;
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
sampleMom = m.group(1);
sampleDad = m.group(2);
sampleChild = m.group(3);
}
else
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
/**
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation
*/
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
sampleMom = sample.getMother().getID();
sampleDad = sample.getFather().getID();
sampleChild = sample.getID();
minGenotypeQuality = minGenotypeQualityP;
}
/**
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
*/
public boolean setAlleles (VariantContext vc)
{
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
Genotype gChild = vc.getGenotypes(sampleChild).get(sampleChild);
if (gMom == null || gDad == null || gChild == null)
throw new IllegalArgumentException(String.format("Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s", vc.getChr(), vc.getStart(), sampleMom, sampleDad, sampleChild));
if (gMom.isNoCall() || gDad.isNoCall() || gChild.isNoCall() ||
gMom.getPhredScaledQual() < minGenotypeQuality ||
gDad.getPhredScaledQual() < minGenotypeQuality ||
gChild.getPhredScaledQual() < minGenotypeQuality ) {
return false;
}
allelesMom = gMom.getAlleles();
allelesDad = gDad.getAlleles();
allelesChild = gChild.getAlleles();
return !allelesMom.isEmpty() && !allelesDad.isEmpty() && !allelesChild.isEmpty();
}
/**
* *
*/
public MendelianViolation(double minGenotypeQualityP) {
this(minGenotypeQualityP,true);
}
/**
* Constructor
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
*/
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound) {
minGenotypeQuality = minGenotypeQualityP;
this.abortOnSampleNotFound = abortOnSampleNotFound;
violationFamilies = new ArrayList<String>();
createInheritanceMap();
}
/**
* Constructor
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
* @param completeTriosOnly - whether only complete trios are considered or parent/child pairs are too.
*/
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound, boolean completeTriosOnly) {
minGenotypeQuality = minGenotypeQualityP;
this.abortOnSampleNotFound = abortOnSampleNotFound;
violationFamilies = new ArrayList<String>();
createInheritanceMap();
allCalledOnly = completeTriosOnly;
}
/**
* @param families the families to be checked for Mendelian violations
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child. * @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
*
*/
public boolean isViolation(VariantContext vc)
{
return setAlleles(vc) && isViolation();
}
/**
* @return whether or not there is a mendelian violation at the site. * @return whether or not there is a mendelian violation at the site.
*/ */
public boolean isViolation() { public int countViolations(Map<String, Set<Sample>> families, VariantContext vc){
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0))) //Reset counts
return false; nocall = 0;
return true; lowQual = 0;
familyCalled = 0;
varFamilyCalled = 0;
violations_total=0;
violationFamilies.clear();
clearInheritanceMap();
for(Set<Sample> family : families.values()){
Iterator<Sample> sampleIterator = family.iterator();
Sample sample;
while(sampleIterator.hasNext()){
sample = sampleIterator.next();
if(sample.getParents().size() > 0)
updateViolations(sample.getFamilyID(),sample.getMaternalID(), sample.getPaternalID(), sample.getID() ,vc);
}
}
return violations_total;
}
public boolean isViolation(Sample mother, Sample father, Sample child, VariantContext vc){
//Reset counts
nocall = 0;
lowQual = 0;
familyCalled = 0;
varFamilyCalled = 0;
violations_total=0;
violationFamilies.clear();
clearInheritanceMap();
updateViolations(mother.getFamilyID(),mother.getID(),father.getID(),child.getID(),vc);
return violations_total>0;
}
private void updateViolations(String familyId, String motherId, String fatherId, String childId, VariantContext vc){
int count;
Genotype gMom = vc.getGenotype(motherId);
Genotype gDad = vc.getGenotype(fatherId);
Genotype gChild = vc.getGenotype(childId);
if (gMom == null || gDad == null || gChild == null){
if(abortOnSampleNotFound)
throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getChr(), vc.getStart(), familyId, motherId, fatherId, childId));
else
return;
}
//Count No calls
if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){
nocall++;
}
else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){
nocall++;
}
//Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
else if (minGenotypeQuality>0 && (gMom.getPhredScaledQual() < minGenotypeQuality ||
gDad.getPhredScaledQual() < minGenotypeQuality ||
gChild.getPhredScaledQual() < minGenotypeQuality )) {
lowQual++;
}
else{
//Count all families per loci called
familyCalled++;
//If the family is all homref, not too interesting
if(!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef()))
{
varFamilyCalled++;
if(isViolation(gMom, gDad, gChild)){
violationFamilies.add(familyId);
violations_total++;
}
}
count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType());
inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1);
}
}
private boolean isViolation(Genotype gMom, Genotype gDad, Genotype gChild) {
//1 parent is no "call
if(!gMom.isCalled()){
return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef());
}
else if(!gDad.isCalled()){
return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef());
}
//Both parents have genotype information
return !(gMom.getAlleles().contains(gChild.getAlleles().get(0)) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) ||
gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(gChild.getAlleles().get(0)));
}
private void createInheritanceMap(){
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
for(Genotype.Type mType : Genotype.Type.values()){
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
for(Genotype.Type dType : Genotype.Type.values()){
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
for(Genotype.Type cType : Genotype.Type.values()){
inheritance.get(mType).get(dType).put(cType, 0);
}
}
}
}
private void clearInheritanceMap(){
for(Genotype.Type mType : Genotype.Type.values()){
for(Genotype.Type dType : Genotype.Type.values()){
for(Genotype.Type cType : Genotype.Type.values()){
inheritance.get(mType).get(dType).put(cType, 0);
}
}
}
} }
/** /**
* @return the likelihood ratio for a mendelian violation * @return the likelihood ratio for a mendelian violation
*/ */
public double violationLikelihoodRatio(VariantContext vc) { public double violationLikelihoodRatio(VariantContext vc, String motherId, String fatherId, String childId) {
double[] logLikAssignments = new double[27]; double[] logLikAssignments = new double[27];
// the matrix to set up is // the matrix to set up is
// MOM DAD CHILD // MOM DAD CHILD
@ -152,9 +399,9 @@ public class MendelianViolation {
// AA AB | AB // AA AB | AB
// |- BB // |- BB
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); double[] momGL = vc.getGenotype(motherId).getLikelihoods().getAsVector();
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); double[] dadGL = vc.getGenotype(fatherId).getLikelihoods().getAsVector();
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); double[] childGL = vc.getGenotype(childId).getLikelihoods().getAsVector();
int offset = 0; int offset = 0;
for ( int oMom = 0; oMom < 3; oMom++ ) { for ( int oMom = 0; oMom < 3; oMom++ ) {
for ( int oDad = 0; oDad < 3; oDad++ ) { for ( int oDad = 0; oDad < 3; oDad++ ) {

View File

@ -27,11 +27,42 @@ public class SampleDBUnitTest extends BaseTest {
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED))); new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
private static final Set<Sample> testPEDFamilyF2 = new HashSet<Sample>(Arrays.asList(
new Sample("s2", "fam2", "d2", "m2", Gender.FEMALE, Affection.AFFECTED),
new Sample("d2", "fam2", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("m2", "fam2", null, null, Gender.FEMALE, Affection.UNKNOWN)
));
private static final Set<Sample> testPEDFamilyF3 = new HashSet<Sample>(Arrays.asList(
new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED),
new Sample("d1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN),
new Sample("m1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN)
));
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList( private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN), new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN), new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN))); new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
private static final HashMap<String, Set<Sample>> testGetFamilies = new HashMap<String,Set<Sample>>();
static {
testGetFamilies.put("fam1", testPEDSamples);
testGetFamilies.put("fam2", testPEDFamilyF2);
testGetFamilies.put("fam3", testPEDFamilyF3);
}
private static final Set<Sample> testKidsWithParentsFamilies2 = new HashSet<Sample>(Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("kid3", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED),
new Sample("kid2", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED)));
private static final HashSet<String> testGetPartialFamiliesIds = new HashSet<String>(Arrays.asList("kid","s1"));
private static final HashMap<String, Set<Sample>> testGetPartialFamilies = new HashMap<String,Set<Sample>>();
static {
testGetPartialFamilies.put("fam1", new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
testGetPartialFamilies.put("fam3", new HashSet<Sample>(Arrays.asList(new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED))));
}
private static final String testPEDString = private static final String testPEDString =
String.format("%s%n%s%n%s", String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2", "fam1 kid dad mom 1 2",
@ -46,6 +77,18 @@ public class SampleDBUnitTest extends BaseTest {
"fam3 s1 d1 m1 2 2", "fam3 s1 d1 m1 2 2",
"fam2 s2 d2 m2 2 2"); "fam2 s2 d2 m2 2 2");
private static final String testPEDMultipleFamilies2 =
String.format("%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2",
"fam4 kid4 dad4 0 1 2",
"fam4 dad4 0 0 1 1",
"fam5 kid2 dad2 mom2 1 2",
"fam5 kid3 dad2 mom2 1 2",
"fam5 dad2 0 0 1 1",
"fam5 mom2 0 0 2 2");
private static final String testPEDStringInconsistentGender = private static final String testPEDStringInconsistentGender =
"fam1 kid 0 0 2 2"; "fam1 kid 0 0 2 2";
@ -138,6 +181,25 @@ public class SampleDBUnitTest extends BaseTest {
Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet); Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
} }
@Test()
public void getFamilies(){
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(db.getFamilies(),testGetFamilies);
Assert.assertEquals(db.getFamilies(null),testGetFamilies);
Assert.assertEquals(db.getFamilies(testGetPartialFamiliesIds),testGetPartialFamilies);
}
@Test()
public void testGetChildrenWithParents()
{
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies2));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(db.getChildrenWithParents(), testKidsWithParentsFamilies2);
Assert.assertEquals(db.getChildrenWithParents(false), testKidsWithParentsFamilies2);
Assert.assertEquals(db.getChildrenWithParents(true), new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
}
@Test() @Test()
public void loadFamilyIDs() { public void loadFamilyIDs() {
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies)); builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));

View File

@ -0,0 +1,104 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
public class ExactAFCalculationModelUnitTest extends BaseTest {
static double[] AA1, AB1, BB1;
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
static final int numSamples = 3;
static double[][] priors = new double[2][2*numSamples+1]; // flat priors
@BeforeSuite
public void before() {
AA1 = new double[]{0.0, -20.0, -20.0};
AB1 = new double[]{-20.0, 0.0, -20.0};
BB1 = new double[]{-20.0, -20.0, 0.0};
AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0};
AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0};
}
private class GetGLsTest extends TestDataProvider {
GenotypesContext GLs;
int numAltAlleles;
String name;
private GetGLsTest(String name, int numAltAlleles, Genotype... arg) {
super(GetGLsTest.class, name);
GLs = GenotypesContext.create(arg);
this.name = name;
this.numAltAlleles = numAltAlleles;
}
public String toString() {
return String.format("%s input=%s", super.toString(), GLs);
}
}
private static Genotype createGenotype(String name, double[] gls) {
return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls);
}
@DataProvider(name = "getGLs")
public Object[][] createGLsData() {
// bi-allelic case
new GetGLsTest("B0", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AA3", AA1));
new GetGLsTest("B1", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AB", AB1));
new GetGLsTest("B2", 1, createGenotype("AA1", AA1), createGenotype("BB", BB1), createGenotype("AA2", AA1));
new GetGLsTest("B3a", 1, createGenotype("AB", AB1), createGenotype("AA", AA1), createGenotype("BB", BB1));
new GetGLsTest("B3b", 1, createGenotype("AB1", AB1), createGenotype("AB2", AB1), createGenotype("AB3", AB1));
new GetGLsTest("B4", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("AA", AA1));
new GetGLsTest("B5", 1, createGenotype("BB1", BB1), createGenotype("AB", AB1), createGenotype("BB2", BB1));
new GetGLsTest("B6", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("BB3", BB1));
// tri-allelic case
new GetGLsTest("B1C0", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AB", AB2));
new GetGLsTest("B0C1", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AC", AC2));
new GetGLsTest("B1C1a", 2, createGenotype("AA", AA2), createGenotype("AB", AB2), createGenotype("AC", AC2));
new GetGLsTest("B1C1b", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("BC", BC2));
new GetGLsTest("B2C1", 2, createGenotype("AB1", AB2), createGenotype("AB2", AB2), createGenotype("AC", AC2));
new GetGLsTest("B3C2a", 2, createGenotype("AB", AB2), createGenotype("BC1", BC2), createGenotype("BC2", BC2));
new GetGLsTest("B3C2b", 2, createGenotype("AB", AB2), createGenotype("BB", BB2), createGenotype("CC", CC2));
return GetGLsTest.getTests(GetGLsTest.class);
}
@Test(dataProvider = "getGLs")
public void testGLs(GetGLsTest cfg) {
final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1];
final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1];
for ( int i = 0; i < 2; i++ ) {
for ( int j = 0; j < 2*numSamples+1; j++ ) {
log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
}
}
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
int calculatedAlleleCount = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors[allele]);
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
}
}
}

View File

@ -7,7 +7,6 @@ import org.testng.annotations.Test;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
// ********************************************************************************** // // ********************************************************************************** //
@ -29,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() { public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("286f0de92e4ce57986ba861390c6019d")); Arrays.asList("a2d3839c4ebb390b0012d495e4e53b3a"));
executeTest("test MultiSample Pilot1", spec); executeTest("test MultiSample Pilot1", spec);
} }
@ -45,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() { public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("d0593483e85a7d815f4c5ee6db284d2a")); Arrays.asList("43e7a17d95b1a0cf72e669657794d802"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
} }
@ -53,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() { public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("3ccce5d909f8f128e496f6841836e5f7")); Arrays.asList("ae29b9c9aacce8046dc780430540cd62"));
executeTest("test SingleSample Pilot2", spec); executeTest("test SingleSample Pilot2", spec);
} }
@ -63,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864"; private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65";
@Test @Test
public void testCompressedOutput() { public void testCompressedOutput() {
@ -84,7 +83,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "95614280c565ad90f8c000376fef822c"; String md5 = "32a34362dff51d8b73a3335048516d82";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -165,8 +164,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testHeterozyosity() { public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>(); HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" ); e.put( 0.01, "2cb2544739e01f6c08fd820112914317" );
e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" ); e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) { for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -276,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("6e182a58472ea17c8b0eb01f80562fbd")); Arrays.asList("45633d905136c86e9d3f90ce613255e5"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
} }
@ -286,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("f93f8a35b47bcf96594ada55e2312c73")); Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3); executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
} }
@ -295,8 +294,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("9be28cb208d8b0314d2bc2696e2fd8d4")); Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51"));
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4); executeTest("test MultiSample Phase1 indels with complicated records", spec4);
} }
@Test @Test

View File

@ -291,6 +291,17 @@ public class VariantEvalIntegrationTest extends WalkerTest {
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
} }
@Test
public void testVEMendelianViolationEvaluator() {
String vcfFile = "/MendelianViolationEval.vcf";
String pedFile = "/MendelianViolationEval.ped";
WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST",
1,
Arrays.asList("66e72c887124f40933d32254b2dd44a3"));
executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec);
}
@Test @Test
public void testCompVsEvalAC() { public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";

View File

@ -115,4 +115,26 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testUsingDbsnpName--" + testFile, spec); executeTest("testUsingDbsnpName--" + testFile, spec);
} }
@Test
public void testParallelization() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testParallelization (2 threads)--" + testfile, spec);
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testParallelization (4 threads)--" + testfile, spec);
}
} }

View File

@ -83,6 +83,10 @@ class DataProcessingPipeline extends QScript {
@Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) @Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false)
var defaultPlatform: String = "" var defaultPlatform: String = ""
@Hidden
@Input(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false)
var testMode: Boolean = false
/**************************************************************************** /****************************************************************************
* Global Variables * Global Variables
@ -335,6 +339,7 @@ class DataProcessingPipeline extends QScript {
this.known ++= qscript.indels this.known ++= qscript.indels
this.consensusDeterminationModel = cleanModelEnum this.consensusDeterminationModel = cleanModelEnum
this.compress = 0 this.compress = 0
this.noPGTag = qscript.testMode;
this.scatterCount = nContigs this.scatterCount = nContigs
this.analysisName = queueLogDir + outBam + ".clean" this.analysisName = queueLogDir + outBam + ".clean"
this.jobName = queueLogDir + outBam + ".clean" this.jobName = queueLogDir + outBam + ".clean"
@ -360,6 +365,7 @@ class DataProcessingPipeline extends QScript {
this.out = outBam this.out = outBam
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString) if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
else if (qscript.intervals != null) this.intervals :+= qscript.intervals else if (qscript.intervals != null) this.intervals :+= qscript.intervals
this.no_pg_tag = qscript.testMode
this.scatterCount = nContigs this.scatterCount = nContigs
this.isIntermediate = false this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration" this.analysisName = queueLogDir + outBam + ".recalibration"

View File

@ -47,6 +47,10 @@ class PacbioProcessingPipeline extends QScript {
@Input(shortName="bwastring", required=false) @Input(shortName="bwastring", required=false)
var bwastring: String = "" var bwastring: String = ""
@Hidden
@Input(shortName = "test", fullName = "test_mode", required = false)
var testMode: Boolean = false
val queueLogDir: String = ".qlog/" val queueLogDir: String = ".qlog/"
def script = { def script = {
@ -170,6 +174,7 @@ class PacbioProcessingPipeline extends QScript {
this.input_file :+= inBam this.input_file :+= inBam
this.recal_file = inRecalFile this.recal_file = inRecalFile
this.out = outBam this.out = outBam
this.no_pg_tag = testMode
this.isIntermediate = false this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration" this.analysisName = queueLogDir + outBam + ".recalibration"
this.jobName = queueLogDir + outBam + ".recalibration" this.jobName = queueLogDir + outBam + ".recalibration"
@ -177,7 +182,6 @@ class PacbioProcessingPipeline extends QScript {
} }
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
this.resources = R
this.recal_file = inRecalFile this.recal_file = inRecalFile
this.output_dir = outPath this.output_dir = outPath
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"

View File

@ -0,0 +1,69 @@
package org.broadinstitute.sting.queue.pipeline
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.BaseTest
class DataProcessingPipelineTest {
@Test
def testSimpleBAM {
val projectName = "test1"
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
val spec = new PipelineTestSpec
spec.name = "DataProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
" -nv ",
" -test ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "1f85e76de760167a77ed1d9ab4da2936"
PipelineTest.executeTest(spec)
}
@Test
def testBWAPEBAM {
val projectName = "test2"
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
val spec = new PipelineTestSpec
spec.name = "DataProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
" -nv ",
" -test ",
" -bwa /home/unix/carneiro/bin/bwa",
" -bwape ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "57416a0abdf9524bc92834d466529708"
PipelineTest.executeTest(spec)
}
}

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.queue.pipeline
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.BaseTest
class PacbioProcessingPipelineTest {
@Test
def testBAM {
val testOut = "exampleBAM.recal.bam"
val spec = new PipelineTestSpec
spec.name = "pacbioProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -blasr ",
" -test ",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString
spec.fileMD5s += testOut -> "f0adce660b55cb91d5f987f9a145471e"
PipelineTest.executeTest(spec)
}
}

Binary file not shown.

Binary file not shown.

282
public/testdata/exampleDBSNP.vcf vendored 100644
View File

@ -0,0 +1,282 @@
##fileformat=VCFv4.1
##FILTER=<ID=NC,Description="Inconsistent Genotype Submission For At Least One Sample">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=ASP,Number=0,Type=Flag,Description="Is Assembly specific. This is set if the variant only maps to one assembly">
##INFO=<ID=ASS,Number=0,Type=Flag,Description="In acceptor splice site FxnCode = 73">
##INFO=<ID=CDA,Number=0,Type=Flag,Description="Variation is interrogated in a clinical diagnostic assay">
##INFO=<ID=CFL,Number=0,Type=Flag,Description="Has Assembly conflict. This is for weight 1 and 2 variant that maps to different chromosomes on different assemblies.">
##INFO=<ID=CLN,Number=0,Type=Flag,Description="Variant is Clinical(LSDB,OMIM,TPA,Diagnostic)">
##INFO=<ID=DSS,Number=0,Type=Flag,Description="In donor splice-site FxnCode = 75">
##INFO=<ID=G5,Number=0,Type=Flag,Description=">5% minor allele frequency in 1+ populations">
##INFO=<ID=G5A,Number=0,Type=Flag,Description=">5% minor allele frequency in each and all populations">
##INFO=<ID=GCF,Number=0,Type=Flag,Description="Has Genotype Conflict Same (rs, ind), different genotype. N/N is not included.">
##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar (|)">
##INFO=<ID=GMAF,Number=1,Type=Float,Description="Global Minor Allele Frequency [0, 0.5]; global population is 1000GenomesProject phase 1 genotype data from 629 individuals, released in the 08-04-2010 dataset">
##INFO=<ID=GNO,Number=0,Type=Flag,Description="Genotypes available. The variant has individual genotype (in SubInd table).">
##INFO=<ID=HD,Number=0,Type=Flag,Description="Marker is on high density genotyping kit (50K density or greater). The variant may have phenotype associations present in dbGaP.">
##INFO=<ID=INT,Number=0,Type=Flag,Description="In Intron FxnCode = 6">
##INFO=<ID=KGPROD,Number=0,Type=Flag,Description="1000 Genome production phase">
##INFO=<ID=KGPilot1,Number=0,Type=Flag,Description="1000 Genome discovery(pilot1) 2009">
##INFO=<ID=KGPilot123,Number=0,Type=Flag,Description="1000 Genome discovery all pilots 2010(1,2,3)">
##INFO=<ID=KGVAL,Number=0,Type=Flag,Description="1000 Genome validated by second method">
##INFO=<ID=LSD,Number=0,Type=Flag,Description="Submitted from a locus-specific database">
##INFO=<ID=MTP,Number=0,Type=Flag,Description="Microattribution/third-party annotation(TPA:GWAS,PAGE)">
##INFO=<ID=MUT,Number=0,Type=Flag,Description="Is mutation (journal citation, explicit fact): a low frequency variation that is cited in journal and other reputable sources">
##INFO=<ID=NOC,Number=0,Type=Flag,Description="Contig allele not present in variant allele list. The reference sequence allele at the mapped position is not present in the variant allele list, adjusted for orientation.">
##INFO=<ID=NOV,Number=0,Type=Flag,Description="Rs cluster has non-overlapping allele sets. True when rs set has more than 2 alleles from different submissions and these sets share no alleles in common.">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=NSF,Number=0,Type=Flag,Description="Has non-synonymous frameshift A coding region variation where one allele in the set changes all downstream amino acids. FxnClass = 44">
##INFO=<ID=NSM,Number=0,Type=Flag,Description="Has non-synonymous missense A coding region variation where one allele in the set changes protein peptide. FxnClass = 42">
##INFO=<ID=NSN,Number=0,Type=Flag,Description="Has non-synonymous nonsense A coding region variation where one allele in the set changes to STOP codon (TER). FxnClass = 41">
##INFO=<ID=OM,Number=0,Type=Flag,Description="Has OMIM/OMIA">
##INFO=<ID=OTH,Number=0,Type=Flag,Description="Has other variant with exactly the same set of mapped positions on NCBI refernce assembly.">
##INFO=<ID=PH1,Number=0,Type=Flag,Description="Phase 1 genotyped: filtered, non-redundant">
##INFO=<ID=PH2,Number=0,Type=Flag,Description="Phase 2 genotyped: filtered, non-redundant">
##INFO=<ID=PH3,Number=0,Type=Flag,Description="Phase 3 genotyped: filtered, non-redundant">
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
##INFO=<ID=PMC,Number=0,Type=Flag,Description="Links exist to PubMed Central article">
##INFO=<ID=R3,Number=0,Type=Flag,Description="In 3' gene region FxnCode = 13">
##INFO=<ID=R5,Number=0,Type=Flag,Description="In 5' gene region FxnCode = 15">
##INFO=<ID=REF,Number=0,Type=Flag,Description="Has reference A coding region variation where one allele in the set is identical to the reference sequence. FxnCode = 8">
##INFO=<ID=RSPOS,Number=1,Type=Integer,Description="Chr position reported in dbSNP">
##INFO=<ID=RV,Number=0,Type=Flag,Description="RS orientation is reversed">
##INFO=<ID=S3D,Number=0,Type=Flag,Description="Has 3D structure - SNP3D table">
##INFO=<ID=SAO,Number=1,Type=Integer,Description="Variant Allele Origin: 0 - unspecified, 1 - Germline, 2 - Somatic, 3 - Both">
##INFO=<ID=SCS,Number=1,Type=Integer,Description="Variant Clinical Significance, 0 - unknown, 1 - untested, 2 - non-pathogenic, 3 - probable-non-pathogenic, 4 - probable-pathogenic, 5 - pathogenic, 6 - drug-response, 7 - histocompatibility, 255 - other">
##INFO=<ID=SLO,Number=0,Type=Flag,Description="Has SubmitterLinkOut - From SNP->SubSNP->Batch.link_out">
##INFO=<ID=SSR,Number=1,Type=Integer,Description="Variant Suspect Reason Code, 0 - unspecified, 1 - Paralog, 2 - byEST, 3 - Para_EST, 4 - oldAlign, 5 - other">
##INFO=<ID=SYN,Number=0,Type=Flag,Description="Has synonymous A coding region variation where one allele in the set does not change the encoded amino acid. FxnCode = 3">
##INFO=<ID=TPA,Number=0,Type=Flag,Description="Provisional Third Party Annotation(TPA) (currently rs from PHARMGKB who will give phenotype data)">
##INFO=<ID=U3,Number=0,Type=Flag,Description="In 3' UTR Location is in an untranslated region (UTR). FxnCode = 53">
##INFO=<ID=U5,Number=0,Type=Flag,Description="In 5' UTR Location is in an untranslated region (UTR). FxnCode = 55">
##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
##INFO=<ID=VLD,Number=0,Type=Flag,Description="Is Validated. This bit is set if the variant has 2+ minor allele count based on frequency or genotype data.">
##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property">
##INFO=<ID=WGT,Number=1,Type=Integer,Description="Weight, 00 - unmapped, 1 - weight 1, 2 - weight 2, 3 - weight 3 or more">
##INFO=<ID=WTD,Number=0,Type=Flag,Description="Is Withdrawn by submitter If one member ss is withdrawn by submitter, then this bit is set. If all member ss' are withdrawn, then the rs is deleted to SNPHistory">
##INFO=<ID=dbSNPBuildID,Number=1,Type=Integer,Description="First dbSNP Build for RS">
##LeftAlignVariants="analysis_type=LeftAlignVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=00-All.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false"
##contig=<ID=chr1,length=249250621,assembly=b37>
##phasing=partial
##reference=GRCh37.3
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
##source=dbSNP
##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 10144 rs144773400 TA T . PASS ASP;RSPOS=10145;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10228 rs143255646 TA T . PASS ASP;RSPOS=10229;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10234 rs145599635 C T . PASS ASP;RSPOS=10234;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10248 rs148908337 A T . PASS ASP;RSPOS=10248;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10254 rs140194106 TA T . PASS ASP;RSPOS=10255;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10291 rs145427775 C T . PASS ASP;RSPOS=10291;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10327 rs112750067 T C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10327;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 10329 rs150969722 AC A . PASS ASP;RSPOS=10330;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10351 rs145072688 CTA C,CA . PASS ASP;RSPOS=10352;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10382 rs147093981 AAC A,AC . PASS ASP;RSPOS=10383;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10433 rs56289060 A AC . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10433;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 10439 rs112766696 AC A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10440;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=132
chr1 10439 rs138941843 AC A . PASS ASP;RSPOS=10440;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10440 rs112155239 C A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10440;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 10492 rs55998931 C T . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.0617001828153565;RSPOS=10492;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040000000100;WGT=0;dbSNPBuildID=129
chr1 10519 rs62636508 G C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10519;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
chr1 10583 rs58108140 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.270566727605119;KGPilot123;RSPOS=10583;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=129
chr1 10611 rs189107123 C G . PASS KGPilot123;RSPOS=10611;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 10828 rs10218492 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10828;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
chr1 10904 rs10218493 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10904;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=119
chr1 10927 rs10218527 A G . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10927;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
chr1 10938 rs28853987 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10938;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11014 rs28484712 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11014;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11022 rs28775022 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11022;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11081 rs10218495 G T . PASS CFL;GENEINFO=LOC100652771:100652771;GNO;RSPOS=11081;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=119
chr1 11863 rs187669455 C A . PASS RSPOS=11863;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
chr1 13302 rs180734498 C T . PASS KGPilot123;RSPOS=13302;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 13327 rs144762171 G C . PASS ASP;KGPilot123;RSPOS=13327;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 13684 rs71260404 C T . PASS GENEINFO=LOC100652771:100652771;GNO;RSPOS=13684;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 13980 rs151276478 T C . PASS ASP;KGPilot123;RSPOS=13980;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 14889 rs142444908 G A . PASS ASP;RSPOS=14889;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 14907 rs79585140 A G . PASS GNO;RSPOS=14907;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 14930 rs75454623 A G . PASS GNO;RSPOS=14930;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 14976 rs71252251 G A . PASS ASP;GNO;RSPOS=14976;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
chr1 15061 rs71268703 T TG . PASS ASP;GNO;RSPOS=15061;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 15118 rs71252250 A G . PASS ASP;GNO;RSPOS=15118;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
chr1 15211 rs144718396 T G . PASS ASP;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 15211 rs78601809 T G . PASS ASP;GNO;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 16257 rs78588380 G C . PASS ASP;GNO;RSPOS=16257;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 16378 rs148220436 T C . PASS ASP;RSPOS=16378;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 16495 rs141130360 G C . PASS ASP;RSPOS=16495;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 16497 rs150723783 A G . PASS ASP;RSPOS=16497;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 17519 rs192890528 G T . PASS RSPOS=17519;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
chr1 19226 rs138930629 T A . PASS ASP;RSPOS=19226;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 20141 rs56336884 G A . PASS HD;RSPOS=20141;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000400000100;WGT=0;dbSNPBuildID=129
chr1 20144 rs143346096 G A . PASS ASP;RSPOS=20144;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 20206 rs71262675 C T . PASS GNO;RSPOS=20206;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 20245 rs71262674 G A . PASS GMAF=0.256398537477148;GNO;RSPOS=20245;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 20304 rs71262673 G C . PASS GMAF=0.338208409506399;GNO;RSPOS=20304;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 26999 rs147506580 A G . PASS ASP;RSPOS=26999;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 29436 rs2462493 G A . PASS GNO;RSPOS=29436;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=100
chr1 30923 rs140337953 G T . PASS ASP;KGPilot123;RSPOS=30923;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 33487 rs77459554 C T . PASS ASP;GNO;RSPOS=33487;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 33495 rs75468675 C T . PASS ASP;GNO;RSPOS=33495;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33505 rs75627161 T C . PASS ASP;GNO;RSPOS=33505;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33508 rs75609629 A T . PASS ASP;GNO;RSPOS=33508;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33521 rs76098219 T A . PASS GNO;RSPOS=33521;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 33593 rs557585 G A . PASS RSPOS=33593;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=83
chr1 33648 rs62028204 G T . PASS RSPOS=33648;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
chr1 33656 rs113821789 T C . PASS RSPOS=33656;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=132
chr1 51476 rs187298206 T C . PASS KGPilot123;RSPOS=51476;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51479 rs116400033 T A . PASS ASP;G5;G5A;GMAF=0.113802559414991;KGPilot123;RSPOS=51479;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070010000100;WGT=0;dbSNPBuildID=132
chr1 51803 rs62637812 T C . PASS GMAF=0.468921389396709;RSPOS=51803;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040000000100;WGT=0;dbSNPBuildID=129
chr1 51898 rs76402894 C A . PASS GMAF=0.0731261425959781;GNO;RSPOS=51898;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
chr1 51914 rs190452223 T G . PASS KGPilot123;RSPOS=51914;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51928 rs78732933 G A . PASS GNO;RSPOS=51928;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
chr1 51935 rs181754315 C T . PASS KGPilot123;RSPOS=51935;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51954 rs185832753 G C . PASS KGPilot123;RSPOS=51954;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 52058 rs62637813 G C . PASS GMAF=0.0342778793418647;KGPilot123;RSPOS=52058;SAO=0;SSR=1;VC=SNV;VLD;VP=050000000000040010000140;WGT=0;dbSNPBuildID=129
chr1 52144 rs190291950 T A . PASS KGPilot123;RSPOS=52144;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 52238 rs150021059 T G . PASS ASP;KGPilot123;RSPOS=52238;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54353 rs140052487 C A . PASS ASP;KGPilot123;RSPOS=54353;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54421 rs146477069 A G . PASS ASP;KGPilot123;RSPOS=54421;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54490 rs141149254 G A . PASS ASP;KGPilot123;RSPOS=54490;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54676 rs2462492 C T . PASS ASP;GMAF=0.191956124314442;GNO;HD;KGPilot123;RSPOS=54676;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040510000100;WGT=0;dbSNPBuildID=100
chr1 54753 rs143174675 T G . PASS ASP;KGPilot123;RSPOS=54753;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54788 rs59861892 CC C,CCT . PASS ASP;RSPOS=54789;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 54795 rs58014817 T A . PASS ASP;RSPOS=54795;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
chr1 55164 rs3091274 C A . PASS G5;G5A;GMAF=0.145338208409506;GNO;KGPilot123;RSPOS=55164;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030110000100;WGT=0;dbSNPBuildID=103
chr1 55299 rs10399749 C T . PASS G5;G5A;GMAF=0.278793418647166;GNO;KGPilot123;PH2;RSPOS=55299;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030112000100;WGT=0;dbSNPBuildID=119
chr1 55302 rs3091273 C T . PASS RSPOS=55302;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
chr1 55313 rs182462964 A T . PASS KGPilot123;RSPOS=55313;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55322 rs3107974 C T . PASS RSPOS=55322;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
chr1 55326 rs3107975 T C . PASS GNO;HD;KGPilot123;RSPOS=55326;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=103
chr1 55330 rs185215913 G A . PASS KGPilot123;RSPOS=55330;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55367 rs190850374 G A . PASS KGPilot123;RSPOS=55367;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55388 rs182711216 C T . PASS KGPilot123;RSPOS=55388;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55394 rs2949420 T A . PASS GNO;KGPilot123;PH2;RSPOS=55394;SAO=0;SSR=0;VC=SNV;VP=050000000000000112000100;WGT=0;dbSNPBuildID=101
chr1 55416 rs193242050 G A . PASS KGPilot123;RSPOS=55416;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55427 rs183189405 T C . PASS KGPilot123;RSPOS=55427;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55545 rs28396308 C T . PASS GNO;RSPOS=55545;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=125
chr1 55816 rs187434873 G A . PASS KGPilot123;RSPOS=55816;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55850 rs191890754 C G . PASS KGPilot123;RSPOS=55850;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55852 rs184233019 G C . PASS KGPilot123;RSPOS=55852;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 56644 rs143342222 A C . PASS ASP;KGPilot123;RSPOS=56644;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 57952 rs189727433 A C . PASS KGPilot123;RSPOS=57952;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 58771 rs140128481 T C . PASS ASP;RSPOS=58771;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 58814 rs114420996 G A . PASS ASP;G5;GMAF=0.0982632541133455;KGPilot123;RSPOS=58814;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
chr1 59040 rs149755937 T C . PASS ASP;KGPilot123;RSPOS=59040;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 60718 rs78395614 G A . PASS CFL;GNO;RSPOS=60718;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 60726 rs192328835 C A . PASS KGPilot123;RSPOS=60726;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 60791 rs76199781 A G . PASS CFL;GNO;RSPOS=60791;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61442 rs74970982 A G . PASS CFL;GMAF=0.076782449725777;GNO;KGPilot123;RSPOS=61442;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61462 rs56992750 T A . PASS CFL;G5;G5A;GMAF=0.0383912248628885;GNO;KGPilot123;RSPOS=61462;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008030110000100;WGT=0;dbSNPBuildID=129
chr1 61480 rs75526266 G C . PASS CFL;GNO;RSPOS=61480;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61499 rs75719746 G A . PASS CFL;GNO;RSPOS=61499;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61743 rs184286948 G C . PASS KGPilot123;RSPOS=61743;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 61920 rs62637820 G A . PASS CFL;GMAF=0.0255941499085923;RSPOS=61920;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=129
chr1 61987 rs76735897 A G . PASS CFL;GMAF=0.292961608775137;GNO;KGPilot123;RSPOS=61987;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61989 rs77573425 G C . PASS CFL;GMAF=0.309414990859232;GNO;KGPilot123;RSPOS=61989;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61993 rs190553843 C T . PASS KGPilot123;RSPOS=61993;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 62156 rs181864839 C T . PASS KGPilot123;RSPOS=62156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 62157 rs10399597 G A . PASS CFL;GMAF=0.00228519195612431;KGPilot123;RSPOS=62157;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=119
chr1 62162 rs140556834 G A . PASS ASP;KGPilot123;RSPOS=62162;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 62203 rs28402963 T C . PASS CFL;KGPilot123;RSPOS=62203;SAO=0;SSR=0;VC=SNV;VP=050000000008000010000100;WGT=0;dbSNPBuildID=125
chr1 62271 rs28599927 A G . PASS CFL;GMAF=0.138482632541133;RSPOS=62271;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=125
chr1 63268 rs75478250 T C . PASS CFL;GNO;RSPOS=63268;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 63276 rs185977555 G A . PASS KGPilot123;RSPOS=63276;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 63297 rs188886746 G A . PASS KGPilot123;RSPOS=63297;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 63671 rs116440577 G A . PASS ASP;G5;GMAF=0.170018281535649;KGPilot123;RSPOS=63671;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
chr1 63737 rs77426996 TACT T,TCTA . PASS CFL;RSPOS=63738;SAO=0;SSR=0;VC=DIV;VP=050000000008000000000200;WGT=0;dbSNPBuildID=131
chr1 64649 rs181431124 A C . PASS KGPilot123;RSPOS=64649;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66008 rs2691286 C G . PASS CFL;GNO;RSPOS=66008;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=100
chr1 66162 rs62639105 A T . PASS CFL;GMAF=0.320383912248629;GNO;KGPilot123;RSPOS=66162;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000110000100;WGT=0;dbSNPBuildID=129
chr1 66176 rs28552463 T A . PASS CFL;GMAF=0.0484460694698355;KGPilot123;RSPOS=66176;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=125
chr1 66219 rs181028663 A T . PASS KGPilot123;RSPOS=66219;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66238 rs113961546 T A . PASS CFL;GNO;RSPOS=66238;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=132
chr1 66314 rs28534012 T A . PASS CFL;RSPOS=66314;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
chr1 66331 rs186063952 A C . PASS KGPilot123;RSPOS=66331;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66334 rs28464214 T A . PASS CFL;RSPOS=66334;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
chr1 66442 rs192044252 T A . PASS KGPilot123;RSPOS=66442;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66457 rs13328655 T A . PASS CFL;GMAF=0.0795246800731261;KGPilot123;RSPOS=66457;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=121
chr1 66503 rs112350669 T A . PASS CFL;RSPOS=66503;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=132
chr1 66507 rs12401368 T A . PASS CFL;GMAF=0.479890310786106;KGPilot123;RSPOS=66507;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=120
chr1 66651 rs2257270 A T . PASS CFL;GNO;RSPOS=66651;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=100
chr1 67179 rs149952626 C G . PASS ASP;KGPilot123;RSPOS=67179;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 67181 rs77662731 A G . PASS ASP;G5;G5A;GENEINFO=OR4F5:79501;GMAF=0.0470749542961609;GNO;KGPilot123;RSPOS=67181;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070110000100;WGT=0;dbSNPBuildID=131
chr1 67223 rs78676975 C A . PASS ASP;GENEINFO=OR4F5:79501;GNO;RSPOS=67223;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 69428 rs140739101 T G . PASS ASP;RSPOS=69428;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69453 rs142004627 G A . PASS ASP;RSPOS=69453;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 69476 rs148502021 T C . PASS ASP;RSPOS=69476;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69496 rs150690004 G A . PASS ASP;RSPOS=69496;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69511 rs75062661 A G . PASS GENEINFO=OR4F5:79501;GMAF=0.193784277879342;GNO;KGPilot123;RSPOS=69511;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000110000100;WGT=0;dbSNPBuildID=131
chr1 69534 rs190717287 T C . PASS KGPilot123;RSPOS=69534;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000010000100;WGT=0;dbSNPBuildID=135
chr1 69552 rs55874132 G C . PASS GENEINFO=OR4F5:79501;HD;RSPOS=69552;S3D;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050300000000040400000100;WGT=0;dbSNPBuildID=129
chr1 69590 rs141776804 T A . PASS ASP;RSPOS=69590;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 69594 rs144967600 T C . PASS ASP;RSPOS=69594;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 72148 rs182862337 C T . PASS KGPilot123;RSPOS=72148;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 73841 rs143773730 C T . PASS ASP;KGPilot123;RSPOS=73841;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 74651 rs62641291 G A . PASS RSPOS=74651;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
chr1 74681 rs13328683 G T . PASS CFL;GMAF=0.286106032906764;RSPOS=74681;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=121
chr1 74709 rs62641292 T A . PASS CFL;RSPOS=74709;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=129
chr1 74771 rs13328675 A G . PASS CFL;RSPOS=74771;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 74790 rs13328700 C G . PASS CFL;RSPOS=74790;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 74792 rs13328684 G A . PASS CFL;RSPOS=74792;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 77462 rs188023513 G A . PASS KGPilot123;RSPOS=77462;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77470 rs192898053 T C . PASS KGPilot123;RSPOS=77470;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77874 rs184538873 G A . PASS KGPilot123;RSPOS=77874;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77961 rs78385339 G A . PASS GMAF=0.125685557586837;KGPilot123;RSPOS=77961;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040010000100;WGT=0;dbSNPBuildID=131
chr1 79033 rs62641298 A G . PASS GMAF=0.438299817184644;GNO;HD;KGPilot123;RSPOS=79033;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=129
chr1 79050 rs62641299 G T . PASS GMAF=0.224405850091408;GNO;KGPilot123;RSPOS=79050;SAO=0;SSR=0;VC=SNV;VP=050000000000000110000100;WGT=0;dbSNPBuildID=129
chr1 79137 rs143777184 A T . PASS ASP;KGPilot123;RSPOS=79137;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 79417 rs184768190 C T . PASS KGPilot123;RSPOS=79417;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 79418 rs2691296 G C . PASS GMAF=0.0178244972577697;RSPOS=79418;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=100
chr1 79538 rs2691295 C T . PASS RSPOS=79538;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=100
chr1 79772 rs147215883 C G . PASS ASP;KGPilot123;RSPOS=79772;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 79872 rs189224661 T G . PASS KGPilot123;RSPOS=79872;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 80323 rs3942603 G C . PASS CFL;GNO;RSPOS=80323;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=108
chr1 80386 rs3878915 C A . PASS GMAF=0.0118829981718464;RSPOS=80386;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=108
chr1 80454 rs144226842 G C . PASS ASP;KGPilot123;RSPOS=80454;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 81836 rs2259560 A T . PASS ASP;GNO;RSPOS=81836;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=100
chr1 81949 rs181567186 T C . PASS KGPilot123;RSPOS=81949;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 81962 rs4030308 T TAA . PASS ASP;RSPOS=81962;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 82102 rs4030307 C T . PASS ASP;RSPOS=82102;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82103 rs2020400 T C . PASS ASP;RSPOS=82103;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
chr1 82126 rs1815133 C T . PASS ASP;RSPOS=82126;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
chr1 82133 rs4030306 CA C,CAAAAAAAAAAAAAAA . PASS ASP;RSPOS=82136;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 82154 rs4477212 A G . PASS ASP;HD;RSPOS=82154;SAO=0;SSR=0;VC=SNV;VP=050000000004000400000100;WGT=0;dbSNPBuildID=111
chr1 82162 rs1815132 C A . PASS ASP;GMAF=0.0351919561243144;GNO;RSPOS=82162;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=92
chr1 82163 rs139113303 G A . PASS ASP;KGPilot123;RSPOS=82163;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 82196 rs112844054 A T . PASS ASP;RSPOS=82196;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 82249 rs1851945 A G . PASS ASP;GMAF=0.0452468007312614;KGPilot123;RSPOS=82249;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=92
chr1 82282 rs3871775 G A . PASS ASP;RSPOS=82282;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82303 rs3871776 T C . PASS ASP;RSPOS=82303;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82316 rs4030305 A C . PASS ASP;GNO;RSPOS=82316;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=108
chr1 82609 rs149189449 C G . PASS ASP;KGPilot123;RSPOS=82609;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 82676 rs185237834 T G . PASS KGPilot123;RSPOS=82676;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 82734 rs4030331 T C . PASS ASP;GMAF=0.261882998171846;KGPilot123;RSPOS=82734;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=108
chr1 82957 rs189774606 C T . PASS KGPilot123;RSPOS=82957;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83084 rs181193408 T A . PASS KGPilot123;RSPOS=83084;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83088 rs186081601 G C . PASS KGPilot123;RSPOS=83088;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83107 rs4405097 G C . PASS ASP;RSPOS=83107;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=111
chr1 83119 rs4030324 AA A,ATAAC . PASS ASP;RSPOS=83120;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 83771 rs189906733 T G . PASS KGPilot123;RSPOS=83771;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83786 rs58520670 T TA . PASS ASP;RSPOS=83794;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83815 rs58857344 GAGAA G . PASS ASP;RSPOS=83827;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83826 rs71281475 AAAGA A,AAA . PASS ASP;GNO;RSPOS=83827;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 83855 rs59596480 GAA G . PASS ASP;RSPOS=83857;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83872 rs59556914 AA A,AAGA . PASS ASP;RSPOS=83873;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83884 rs59586754 GAAA G . PASS ASP;RSPOS=83885;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83897 rs61330047 GAA G . PASS ASP;RSPOS=83899;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83901 rs58254183 GAAAGAA G . PASS ASP;RSPOS=83903;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83921 rs61338823 GAA G . PASS ASP;RSPOS=83923;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83930 rs71281474 AG A,AGA . PASS ASP;GNO;RSPOS=83931;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 83934 rs59235392 AG A,AGAAA . PASS ASP;RSPOS=83935;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83977 rs180759811 A G . PASS KGPilot123;RSPOS=83977;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84002 rs28850140 G A . PASS ASP;GMAF=0.138939670932358;KGPilot123;RSPOS=84002;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=125
chr1 84010 rs186443818 G A . PASS KGPilot123;RSPOS=84010;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84018 rs61352176 GAA G . PASS ASP;RSPOS=84020;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 84079 rs190867312 T C . PASS KGPilot123;RSPOS=84079;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84139 rs183605470 A T . PASS KGPilot123;RSPOS=84139;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84156 rs188652299 A C . PASS KGPilot123;RSPOS=84156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84244 rs191297051 A C . PASS KGPilot123;RSPOS=84244;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84295 rs183209871 G A . PASS KGPilot123;RSPOS=84295;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84346 rs187855973 T C . PASS KGPilot123;RSPOS=84346;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84453 rs191379015 C G . PASS KGPilot123;RSPOS=84453;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84705 rs183470350 T G . PASS KGPilot123;RSPOS=84705;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135

View File

@ -0,0 +1 @@
100000 1 0

View File

@ -0,0 +1,3 @@
100000 1 11
0 chr1 (null)
0 100000 0

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.