Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mark DePristo 2011-12-13 18:19:41 -05:00
commit 7dd5c74591
37 changed files with 1367 additions and 673 deletions

View File

@ -711,6 +711,8 @@ public class SAMDataSource {
* @param validationStringency validation stringency.
*/
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1;
for(SAMReaderID readerID: readerIDs) {
File indexFile = findIndexFile(readerID.samFile);
@ -728,8 +730,7 @@ public class SAMDataSource {
reader.enableFileSource(true);
reader.setValidationStringency(validationStringency);
final SAMFileHeader header = reader.getFileHeader();
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
readers.put(readerID,reader);
}

View File

@ -142,20 +142,75 @@ public class SampleDB {
* @return
*/
public final Map<String, Set<Sample>> getFamilies() {
return getFamilies(null);
}
/**
* Returns a map from family ID -> set of family members for all samples in sampleIds with
* non-null family ids
*
* @param sampleIds - all samples to include. If null is passed then all samples are returned.
* @return
*/
public final Map<String, Set<Sample>> getFamilies(Collection<String> sampleIds) {
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
for ( final Sample sample : samples.values() ) {
final String famID = sample.getFamilyID();
if ( famID != null ) {
if ( ! families.containsKey(famID) )
families.put(famID, new TreeSet<Sample>());
families.get(famID).add(sample);
if(sampleIds == null || sampleIds.contains(sample.getID())){
final String famID = sample.getFamilyID();
if ( famID != null ) {
if ( ! families.containsKey(famID) )
families.put(famID, new TreeSet<Sample>());
families.get(famID).add(sample);
}
}
}
return families;
}
/**
* Returns the set of all children that have both of their parents.
* Note that if a family is composed of more than 1 child, each child is
* returned.
* @return - all the children that have both of their parents
*/
public final Set<Sample> getChildrenWithParents(){
return getChildrenWithParents(false);
}
/**
* Returns the set of all children that have both of their parents.
* Note that if triosOnly = false, a family is composed of more than 1 child, each child is
* returned.
*
* This method can be used wherever trios are needed
*
* @param triosOnly - if set to true, only strict trios are returned
* @return - all the children that have both of their parents
*/
public final Set<Sample> getChildrenWithParents(boolean triosOnly) {
Map<String, Set<Sample>> families = getFamilies();
final Set<Sample> childrenWithParents = new HashSet<Sample>();
Iterator<Sample> sampleIterator;
for ( Set<Sample> familyMembers: families.values() ) {
if(triosOnly && familyMembers.size() != 3)
continue;
sampleIterator = familyMembers.iterator();
Sample sample;
while(sampleIterator.hasNext()){
sample = sampleIterator.next();
if(sample.getParents().size() == 2 && familyMembers.containsAll(sample.getParents()))
childrenWithParents.add(sample);
}
}
return childrenWithParents;
}
/**
* Return all samples with a given family ID
* @param familyId

View File

@ -88,7 +88,7 @@ public abstract class Walker<MapType, ReduceType> {
return getToolkit().getMasterSequenceDictionary();
}
protected SampleDB getSampleDB() {
public SampleDB getSampleDB() {
return getToolkit().getSampleDB();
}

View File

@ -3,22 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* Created by IntelliJ IDEA.
@ -30,23 +26,26 @@ import java.util.Map;
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
private MendelianViolation mendelianViolation = null;
private String motherId;
private String fatherId;
private String childId;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( mendelianViolation == null ) {
if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
else {
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line.");
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
}
}
Map<String,Object> toRet = new HashMap<String,Object>(1);
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() &&
vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() &&
vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods();
boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() &&
vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() &&
vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods();
if ( hasAppropriateGenotypes )
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId));
return toRet;
}
@ -55,4 +54,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
private boolean checkAndSetSamples(SampleDB db){
Set<String> families = db.getFamilyIDs();
if(families.size() != 1)
return false;
Set<Sample> family = db.getFamily(families.iterator().next());
if(family.size() != 3)
return false;
Iterator<Sample> sampleIter = family.iterator();
Sample sample;
for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){
if(sample.getParents().size()==2){
motherId = sample.getMaternalID();
fatherId = sample.getPaternalID();
childId = sample.getID();
return true;
}
}
return false;
}
}

View File

@ -12,10 +12,8 @@ import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.FileNotFoundException;
import java.util.*;
/**
@ -26,42 +24,33 @@ import java.util.*;
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation {
private Set<MendelianViolation> fullMVSet = null;
private Set<Sample> trios = null;
private final static int REF = 0;
private final static int HET = 1;
private final static int HOM = 2;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( fullMVSet == null ) {
fullMVSet = new HashSet<MendelianViolation>();
if ( trios == null ) {
if ( walker instanceof VariantAnnotator ) {
final Map<String,Set<Sample>> families = ((VariantAnnotator) walker).getSampleDB().getFamilies();
for( final Set<Sample> family : families.values() ) {
for( final Sample sample : family ) {
if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now
fullMVSet.add( new MendelianViolation(sample, 0.0) );
}
}
}
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
} else {
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
}
}
final Map<String,Object> toRet = new HashMap<String,Object>(1);
final HashSet<MendelianViolation> mvsToTest = new HashSet<MendelianViolation>();
final HashSet<Sample> triosToTest = new HashSet<Sample>();
for( final MendelianViolation mv : fullMVSet ) {
final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() &&
vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() &&
vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods();
for( final Sample child : trios) {
final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() &&
vc.hasGenotype(child.getPaternalID()) && vc.getGenotype(child.getPaternalID()).hasLikelihoods() &&
vc.hasGenotype(child.getMaternalID()) && vc.getGenotype(child.getMaternalID()).hasLikelihoods();
if ( hasAppropriateGenotypes ) {
mvsToTest.add(mv);
triosToTest.add(child);
}
}
toRet.put("TDT", calculateTDT( vc, mvsToTest ));
toRet.put("TDT", calculateTDT( vc, triosToTest ));
return toRet;
}
@ -72,27 +61,27 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
private double calculateTDT( final VariantContext vc, final Set<MendelianViolation> mvsToTest ) {
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM);
final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM);
final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET);
final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET);
final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET);
final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET);
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM);
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM);
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET);
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET);
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
return (numer * numer) / denom;
}
private double calculateNChildren( final VariantContext vc, final Set<MendelianViolation> mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) {
final double likelihoodVector[] = new double[mvsToTest.size() * 2];
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
final double likelihoodVector[] = new double[triosToTest.size() * 2];
int iii = 0;
for( final MendelianViolation mv : mvsToTest ) {
final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector();
final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector();
final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector();
for( final Sample child : triosToTest ) {
final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
}

View File

@ -167,9 +167,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false;
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
public String familyStr = null;
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
public double minGenotypeQualityP = 0.0;

View File

@ -1,94 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.variantcontext.Allele;
public class BiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
private Allele A, B;
private int depth;
/**
* Create a new object for sample with given alleles and genotype likelihoods
*
* @param sample sample name
* @param A allele A
* @param B allele B
* @param log10AALikelihoods AA likelihoods
* @param log10ABLikelihoods AB likelihoods
* @param log10BBLikelihoods BB likelihoods
* @param depth the read depth used in creating the likelihoods
*/
public BiallelicGenotypeLikelihoods(String sample,
Allele A,
Allele B,
double log10AALikelihoods,
double log10ABLikelihoods,
double log10BBLikelihoods,
int depth) {
this.sample = sample;
this.A = A;
this.B = B;
this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods};
this.depth = depth;
}
public String getSample() {
return sample;
}
public double getAALikelihoods() {
return GLs[0];
}
public double getABLikelihoods() {
return GLs[1];
}
public double getBBLikelihoods() {
return GLs[2];
}
public double[] getLikelihoods() {
return GLs;
}
public Allele getAlleleA() {
return A;
}
public Allele getAlleleB() {
return B;
}
public int getDepth() {
return depth;
}
}

View File

@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: Aug 4, 2009
* Time: 6:46:09 PM
* To change this template use File | Settings | File Templates.
*/
public enum DiploidGenotype {
AA ('A', 'A'),
AC ('A', 'C'),
@ -110,6 +103,20 @@ public enum DiploidGenotype {
return conversionMatrix[index1][index2];
}
/**
* create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
* @param baseIndex1 base1
* @param baseIndex2 base2
* @return the diploid genotype
*/
public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) {
if ( baseIndex1 == -1 )
throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
if ( baseIndex2 == -1 )
throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
return conversionMatrix[baseIndex1][baseIndex2];
}
private static final DiploidGenotype[][] conversionMatrix = {
{ DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT },
{ DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT },

View File

@ -56,7 +56,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); // TODO -- initialize with size of GLs
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
@ -364,7 +364,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
else {
// all possible likelihoods for a given cell from which to choose the max
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
final double[] log10ConformationLikelihoods = new double[numPaths];
final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
final double[] gl = genotypeLikelihoods.get(j);
@ -372,6 +372,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// initialize
for ( int i = 0; i < numPaths; i++ )
// TODO -- Arrays.fill?
// todo -- is this even necessary? Why not have as else below?
log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
// deal with the AA case first
@ -417,6 +419,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
// todo -- arent' there a small number of fixed values that this function can adopt?
// todo -- at a minimum it'd be good to partially compute some of these in ACCounts for performance
// todo -- need to cache PLIndex -> two alleles, compute looping over each PLIndex. Note all other operations are efficient
// todo -- this can be computed once at the start of the all operations
// the closed form representation generalized for multiple alleles is as follows:
// AA: (2j - totalK) * (2j - totalK - 1)

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Map;
@ -79,19 +80,17 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
* @param contexts stratified alignment contexts
* @param contextType stratified context type
* @param priors priors to use for GLs
* @param GLs hash of sample->GL to fill in
* @param alternateAlleleToUse the alternate allele to use, null if not set
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
* @return genotype likelihoods per sample for AA, AB, BB
* @return variant context where genotypes are no-called but with GLs
*/
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse,
boolean useBAQedPileup);
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Allele alternateAlleleToUse,
boolean useBAQedPileup);
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// get deletion length
int dLen = Integer.valueOf(bestAltAllele.substring(1));
// get ref bases of accurate deletion
int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart());
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
//System.out.println(new String(ref.getBases()));
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
public Allele getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse,
boolean useBAQedPileup) {
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Allele alternateAlleleToUse,
boolean useBAQedPileup) {
if ( tracker == null )
return null;
GenomeLoc loc = ref.getLocus();
Allele refAllele, altAllele;
VariantContext vc = null;
@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
ref, hsize, numPrefBases);
// start making the VariantContext
final int endLoc = calculateEndPos(alleleList, refAllele, loc);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
// For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
// initialize the GenotypeLikelihoods
GLs.clear();
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (pileup != null ) {
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
alleleList,
genotypeLikelihoods,
getFilteredDepth(pileup)));
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
if (DEBUG) {
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
}
return refAllele;
return builder.genotypes(genotypes).make();
}
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// for indels, stop location is one more than ref allele length
boolean hasNullAltAllele = false;
for ( Allele a : alleles ) {
if ( a.isNull() ) {
hasNullAltAllele = true;
break;
}
}
int endLoc = loc.getStart() + refAllele.length();
if( !hasNullAltAllele )
endLoc--;
return endLoc;
}
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
return indelLikelihoodMap.get();

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: 6/1/11
* Time: 10:38 AM
* To change this template use File | Settings | File Templates.
*/
public class MultiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
private List<Allele> alleleList;
private int depth;
public MultiallelicGenotypeLikelihoods(String sample,
List<Allele> A,
double[] log10Likelihoods, int depth) {
/* Check for consistency between likelihood vector and number of alleles */
int numAlleles = A.size();
if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
this.sample = sample;
this.alleleList = A;
this.GLs = log10Likelihoods;
this.depth = depth;
}
public String getSample() {
return sample;
}
public double[] getLikelihoods() {
return GLs;
}
public List<Allele> getAlleles() {
return alleleList;
}
public int getDepth() {
return depth;
}
}

View File

@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.*;
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
// the alternate allele with the largest sum of quality scores
protected Byte bestAlternateAllele = null;
private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50;
private boolean ALLOW_MULTIPLE_ALLELES;
private final boolean useAlleleFromVCF;
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
}
public Allele getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse,
boolean useBAQedPileup) {
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Allele alternateAlleleToUse,
boolean useBAQedPileup) {
if ( !(priors instanceof DiploidSNPGenotypePriors) )
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
byte refBase = ref.getBase();
Allele refAllele = Allele.create(refBase, true);
final boolean[] basesToUse = new boolean[4];
final byte refBase = ref.getBase();
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
// find the alternate allele with the largest sum of quality scores
// start making the VariantContext
final GenomeLoc loc = ref.getLocus();
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(Allele.create(refBase, true));
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
// find the alternate allele(s) that we should be using
if ( alternateAlleleToUse != null ) {
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
} else if ( useAlleleFromVCF ) {
VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
// ignore places where we don't have a variant
if ( vc == null )
// ignore places where we don't have a SNP
if ( vc == null || !vc.isSNP() )
return null;
if ( !vc.isBiallelic() ) {
// for multi-allelic sites go back to the reads and find the most likely alternate allele
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
} else {
bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0];
}
for ( Allele allele : vc.getAlternateAlleles() )
basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
} else {
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);
// how many alternate alleles are we using?
int alleleCounter = countSetBits(basesToUse);
// if there are no non-ref alleles...
if ( alleleCounter == 0 ) {
// if we only want variants, then we don't need to calculate genotype likelihoods
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
return builder.make();
// otherwise, choose any alternate allele (it doesn't really matter)
basesToUse[indexOfRefBase == 0 ? 1 : 0] = true;
}
}
// if there are no non-ref bases...
if ( bestAlternateAllele == null ) {
// if we only want variants, then we don't need to calculate genotype likelihoods
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
return refAllele;
// otherwise, choose any alternate allele (it doesn't really matter)
bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C');
// create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
final int numAltAlleles = countSetBits(basesToUse);
final int[] alleleOrdering = new int[numAltAlleles + 1];
alleleOrdering[0] = indexOfRefBase;
int alleleOrderingIndex = 1;
int numLikelihoods = 1;
for ( int i = 0; i < 4; i++ ) {
if ( i != indexOfRefBase && basesToUse[i] ) {
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
alleleOrdering[alleleOrderingIndex++] = i;
numLikelihoods += alleleOrderingIndex;
}
}
builder.alleles(alleles);
Allele altAllele = Allele.create(bestAlternateAllele, false);
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); }
if ( useBAQedPileup )
pileup = createBAQedPileup( pileup );
// create the GenotypeLikelihoods object
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
if ( nGoodBases == 0 )
continue;
double[] likelihoods = GL.getLikelihoods();
final double[] allLikelihoods = GL.getLikelihoods();
final double[] myLikelihoods = new double[numLikelihoods];
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase);
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele);
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele);
ArrayList<Allele> aList = new ArrayList<Allele>();
aList.add(refAllele);
aList.add(altAllele);
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
int myLikelihoodsIndex = 0;
for ( int i = 0; i <= numAltAlleles; i++ ) {
for ( int j = i; j <= numAltAlleles; j++ ) {
myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
}
}
// normalize in log space so that max element is zero.
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
}
return refAllele;
return builder.genotypes(genotypes).make();
}
protected void initializeBestAlternateAllele(byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
private int countSetBits(boolean[] array) {
int counter = 0;
for ( int i = 0; i < array.length; i++ ) {
if ( array[i] )
counter++;
}
return counter;
}
// fills in the allelesToUse array
protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
int[] qualCounts = new int[4];
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE ))
if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) )
continue;
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
@ -149,17 +189,31 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
}
}
// set the non-ref base with maximum quality score sum
int maxCount = 0;
bestAlternateAllele = null;
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( qualCounts[index] > maxCount ) {
maxCount = qualCounts[index];
bestAlternateAllele = altAllele;
if ( ALLOW_MULTIPLE_ALLELES ) {
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) {
allelesToUse[index] = true;
}
}
} else {
// set the non-ref base which has the maximum quality score sum
int maxCount = 0;
int indexOfMax = 0;
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( qualCounts[index] > maxCount ) {
maxCount = qualCounts[index];
indexOfMax = index;
}
}
if ( maxCount > 0 )
allelesToUse[indexOfMax] = true;
}
}

View File

@ -219,14 +219,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>();
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
if ( refAllele != null )
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
else
return null;
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -261,40 +254,6 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vc, false);
}
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
// no-call everyone for now
List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
Set<Allele> alleles = new LinkedHashSet<Allele>();
alleles.add(refAllele);
boolean addedAltAlleles = false;
GenotypesContext genotypes = GenotypesContext.create();
for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
if ( !addedAltAlleles ) {
addedAltAlleles = true;
// ordering important to maintain consistency
for (Allele a: GL.getAlleles()) {
alleles.add(a);
}
}
HashMap<String, Object> attributes = new HashMap<String, Object>();
//GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods());
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
}
GenomeLoc loc = refContext.getLocus();
int endLoc = calculateEndPos(alleles, refAllele, loc);
return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make();
}
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
return calculateGenotypes(null, null, null, null, vc, model);
}
@ -412,8 +371,11 @@ public class UnifiedGenotyperEngine {
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) )
builder.filters(filter);
if ( !limitedContext )
if ( limitedContext ) {
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
} else {
builder.referenceBaseForIndel(refContext.getBase());
}
// create the genotypes
GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
@ -494,42 +456,6 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
}
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// TODO - temp fix until we can deal with extended events properly
// for indels, stop location is one more than ref allele length
boolean isSNP = true, hasNullAltAllele = false;
for (Allele a : alleles){
if (a.length() != 1) {
isSNP = false;
break;
}
}
for (Allele a : alleles){
if (a.isNull()) {
hasNullAltAllele = true;
break;
}
}
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
// standard insertion: ref allele length = 0, endLos = startLoc
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
//
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
int endLoc = loc.getStart();
if ( !isSNP) {
endLoc += refAllele.length();
if(!hasNullAltAllele)
endLoc--;
}
return endLoc;
}
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
Map<String, AlignmentContext> stratifiedContexts = null;

View File

@ -320,17 +320,17 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
if(transmissionProb != NO_TRANSMISSION_PROB)
phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb));
//Handle null, missing and unavailable genotypes
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
//genotype so it is safe to return the original genotype in this case.
//In addition, if the phasing confidence is 0, then return the unphased, original genotypes.
if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType()))
return genotype;
//Handle null, missing and unavailable genotypes
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
//genotype so it is safe to return the original genotype in this case.
//In addition, if the phasing confidence is 0, then return the unphased, original genotypes.
if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType()))
return genotype;
//Add the transmission probability
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
genotypeAttributes.putAll(genotype.getAttributes());
if(transmissionProb>NO_TRANSMISSION_PROB)
//Add the transmission probability
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
genotypeAttributes.putAll(genotype.getAttributes());
if(transmissionProb>NO_TRANSMISSION_PROB)
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2);

View File

@ -164,13 +164,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
protected double MIN_PHASE_QUALITY = 10.0;
/**
* This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined.
*/
@Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false)
protected String FAMILY_STRUCTURE;
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50.", required=false)
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
@ -561,8 +555,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; }
public String getFamilyStructure() { return FAMILY_STRUCTURE; }
public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; }
public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; }

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -7,9 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
/**
* Mendelian violation detection and counting
@ -40,12 +44,25 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "Number of mendelian variants found")
@DataPoint(description = "Number of variants found with at least one family having genotypes")
long nVariants;
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall")
long nSkipped;
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)")
long nFamCalled;
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.")
long nVarFamCalled;
@DataPoint(description="Number of variants x families discarded as low quality")
long nLowQual;
@DataPoint(description="Number of variants x families discarded as no call")
long nNoCall;
@DataPoint(description="Number of loci with mendelian violations")
long nLociViolations;
@DataPoint(description = "Number of mendelian violations found")
long nViolations;
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
long KidHomRef_ParentHomVar;
@DataPoint(description = "number of child het calls where the parent was hom ref")
long KidHet_ParentsHomRef;
@ -53,11 +70,65 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
long KidHet_ParentsHomVar;
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
long KidHomVar_ParentHomRef;
*/
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR")
long mvRefRef_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET")
long mvRefRef_Het;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR")
long mvRefHet_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR")
long mvRefVar_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF")
long mvRefVar_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF")
long mvVarHet_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF")
long mvVarVar_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET")
long mvVarVar_Het;
/*@DataPoint(description ="Number of inherited var alleles from het parents")
long nInheritedVar;
@DataPoint(description ="Number of inherited ref alleles from het parents")
long nInheritedRef;*/
@DataPoint(description="Number of HomRef/HomRef/HomRef trios")
long HomRefHomRef_HomRef;
@DataPoint(description="Number of Het/Het/Het trios")
long HetHet_Het;
@DataPoint(description="Number of Het/Het/HomRef trios")
long HetHet_HomRef;
@DataPoint(description="Number of Het/Het/HomVar trios")
long HetHet_HomVar;
@DataPoint(description="Number of HomVar/HomVar/HomVar trios")
long HomVarHomVar_HomVar;
@DataPoint(description="Number of HomRef/HomVar/Het trios")
long HomRefHomVAR_Het;
@DataPoint(description="Number of ref alleles inherited from het/het parents")
long HetHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from het/het parents")
long HetHet_inheritedVar;
@DataPoint(description="Number of ref alleles inherited from homRef/het parents")
long HomRefHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from homRef/het parents")
long HomRefHet_inheritedVar;
@DataPoint(description="Number of ref alleles inherited from homVar/het parents")
long HomVarHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from homVar/het parents")
long HomVarHet_inheritedVar;
MendelianViolation mv;
PrintStream mvFile;
Map<String,Set<Sample>> families;
public void initialize(VariantEvalWalker walker) {
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
//Changed by Laurent Francioli - 2011-06-07
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
families = walker.getSampleDB().getFamilies();
}
public boolean enabled() {
@ -75,110 +146,48 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
if (mv.setAlleles(vc)) {
if(mv.countViolations(families,vc)>0){
nLociViolations++;
nViolations += mv.getViolationsCount();
mvRefRef_Var += mv.getParentsRefRefChildVar();
mvRefRef_Het += mv.getParentsRefRefChildHet();
mvRefHet_Var += mv.getParentsRefHetChildVar();
mvRefVar_Var += mv.getParentsRefVarChildVar();
mvRefVar_Ref += mv.getParentsRefVarChildRef();
mvVarHet_Ref += mv.getParentsVarHetChildRef();
mvVarVar_Ref += mv.getParentsVarVarChildRef();
mvVarVar_Het += mv.getParentsVarVarChildHet();
}
HomRefHomRef_HomRef += mv.getRefRefRef();
HetHet_Het += mv.getHetHetHet();
HetHet_HomRef += mv.getHetHetHomRef();
HetHet_HomVar += mv.getHetHetHomVar();
HomVarHomVar_HomVar += mv.getVarVarVar();
HomRefHomVAR_Het += mv.getRefVarHet();
HetHet_inheritedRef += mv.getParentsHetHetInheritedRef();
HetHet_inheritedVar += mv.getParentsHetHetInheritedVar();
HomRefHet_inheritedRef += mv.getParentsRefHetInheritedRef();
HomRefHet_inheritedVar += mv.getParentsRefHetInheritedVar();
HomVarHet_inheritedRef += mv.getParentsVarHetInheritedRef();
HomVarHet_inheritedVar += mv.getParentsVarHetInheritedVar();
if(mv.getFamilyCalledCount()>0){
nVariants++;
Genotype momG = vc.getGenotype(mv.getSampleMom());
Genotype dadG = vc.getGenotype(mv.getSampleDad());
Genotype childG = vc.getGenotype(mv.getSampleChild());
if (mv.isViolation()) {
nViolations++;
String label;
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
label = "KidHomRef_ParentHomVar";
KidHomRef_ParentHomVar++;
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
label = "KidHet_ParentsHomRef";
KidHet_ParentsHomRef++;
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
label = "KidHet_ParentsHomVar";
KidHet_ParentsHomVar++;
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
label = "KidHomVar_ParentHomRef";
KidHomVar_ParentHomRef++;
} else {
throw new ReviewedStingException("BUG: unexpected child genotype class " + childG);
}
return "MendelViolation=" + label;
}
nFamCalled += mv.getFamilyCalledCount();
nLowQual += mv.getFamilyLowQualsCount();
nNoCall += mv.getFamilyNoCallCount();
nVarFamCalled += mv.getVarFamilyCalledCount();
}
}
return null; // we don't capture any intersting sites
}
/*
private double getQThreshold() {
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
//return 0.0;
}
TrioStructure trio;
double mendelianViolationQualThreshold;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public static class TrioStructure {
public String mom, dad, child;
}
public static TrioStructure parseTrioDescription(String family) {
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
TrioStructure trio = new TrioStructure();
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
trio.mom = m.group(1);
trio.dad = m.group(2);
trio.child = m.group(3);
return trio;
} else {
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
}
public void initialize(VariantEvalWalker walker) {
trio = parseTrioDescription(walker.getFamilyStructure());
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
}
private boolean includeGenotype(Genotype g) {
return g.getLog10PError() > getQThreshold() && g.isCalled();
}
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
}
public static boolean isViolation(VariantContext vc, TrioStructure trio ) {
return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) );
}
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
//VariantContext momVC = vc.subContextFromGenotypes(momG);
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
int i = 0;
Genotype childG = new Genotype("kidG", childA);
for (Allele momAllele : momA) {
for (Allele dadAllele : dadA) {
if (momAllele.isCalled() && dadAllele.isCalled()) {
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
if (childG.sameGenotype(possibleChild)) {
return false;
}
}
else{
nSkipped++;
}
return null;
}
return true;
return null; // we don't capture any interesting sites
}
*/
}

View File

@ -26,9 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -41,7 +42,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.*;
@ -180,7 +180,7 @@ import java.util.*;
* </pre>
*
*/
public class SelectVariants extends RodWalker<Integer, Integer> {
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
@ -282,6 +282,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
private double fractionRandom = 0;
@Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
private double fractionGenotypes = 0;
/**
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
* When specified one or more times, a particular type of variant is selected.
@ -325,7 +328,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private boolean DISCORDANCE_ONLY = false;
private boolean CONCORDANCE_ONLY = false;
private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>();
private MendelianViolation mv;
/* variables used by the SELECT RANDOM modules */
@ -344,6 +347,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private PrintStream outMVFileStream = null;
//Random number generator for the genotypes to remove
private Random randomGenotypes = new Random();
/**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
@ -380,8 +385,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
for ( String sample : samples )
logger.info("Including sample '" + sample + "'");
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
if (TYPES_TO_INCLUDE.isEmpty()) {
@ -421,29 +424,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
if (MENDELIAN_VIOLATIONS) {
if ( FAMILY_STRUCTURE_FILE != null) {
try {
for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) {
MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom()))
mvSet.add(mv);
}
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
}
if (outMVFile != null)
try {
outMVFileStream = new PrintStream(outMVFile);
}
catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
}
else
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
}
else if (!FAMILY_STRUCTURE.isEmpty()) {
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
MENDELIAN_VIOLATIONS = true;
mv = new MendelianViolation(MENDELIAN_VIOLATION_QUAL_THRESHOLD,false,true);
}
SELECT_RANDOM_NUMBER = numRandom > 0;
@ -479,26 +460,26 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
}
for (VariantContext vc : vcs) {
if (MENDELIAN_VIOLATIONS) {
boolean foundMV = false;
for (MendelianViolation mv : mvSet) {
if (mv.isViolation(vc)) {
foundMV = true;
//System.out.println(vc.toString());
if (outMVFile != null)
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
break;
if (outMVFile != null){
for( String familyId : mv.getViolationFamilies()){
for(Sample sample : this.getSampleDB().getFamily(familyId)){
if(sample.getParents().size() > 0){
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(),
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() );
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
}
}
}
if (!foundMV)
break;
}
if (DISCORDANCE_ONLY) {
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
if (!isDiscordant(vc, compVCs))
@ -629,6 +610,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Override
public Integer reduce(Integer value, Integer sum) { return value + sum; }
@Override
public Integer treeReduce(Integer lhs, Integer rhs) {
return lhs + rhs;
}
public void onTraversalDone(Integer result) {
logger.info(result + " records processed.");
@ -657,9 +643,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
VariantContextBuilder builder = new VariantContextBuilder(sub);
GenotypesContext newGC = sub.getGenotypes();
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
if ( vc.getAlleles().size() != sub.getAlleles().size() )
builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes()));
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
//Remove a fraction of the genotypes if needed
if(fractionGenotypes>0){
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
alleles.add(Allele.create((byte)'.'));
alleles.add(Allele.create((byte)'.'));
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
}
else{
genotypes.add(genotype);
}
}
newGC = GenotypesContext.create(genotypes);
}
builder.genotypes(newGC);
int depth = 0;
for (String sample : sub.getSampleNames()) {

View File

@ -1,147 +1,394 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.*;
/**
* User: carneiro
* User: carneiro / lfran
* Date: 3/9/11
* Time: 12:38 PM
*
* Class for the identification and tracking of mendelian violation. It can be used in 2 distinct ways:
* - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while
* walking over the variants
* - Or using the static methods to directly get information about mendelian violation in a family at a given locus
*
*/
public class MendelianViolation {
String sampleMom;
String sampleDad;
String sampleChild;
//List of families with violations
private List<String> violationFamilies;
List allelesMom;
List allelesDad;
List allelesChild;
//Call information
private int nocall = 0;
private int familyCalled = 0;
private int varFamilyCalled = 0;
private int lowQual = 0;
double minGenotypeQuality;
private boolean allCalledOnly = true;
//Stores occurrences of inheritance
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
private int violations_total=0;
private double minGenotypeQuality;
private boolean abortOnSampleNotFound;
//Number of families with genotype information for all members
public int getFamilyCalledCount(){
return familyCalled;
}
//Number of families with genotype information for all members
public int getVarFamilyCalledCount(){
return varFamilyCalled;
}
//Number of families missing genotypes for one or more of their members
public int getFamilyNoCallCount(){
return nocall;
}
//Number of families with genotypes below the set quality threshold
public int getFamilyLowQualsCount(){
return lowQual;
}
public int getViolationsCount(){
return violations_total;
}
//Count of alt alleles inherited from het parents (no violation)
public int getParentHetInheritedVar(){
return getParentsHetHetInheritedVar() + getParentsRefHetInheritedVar() + getParentsVarHetInheritedVar();
}
//Count of ref alleles inherited from het parents (no violation)
public int getParentHetInheritedRef(){
return getParentsHetHetInheritedRef() + getParentsRefHetInheritedRef() + getParentsVarHetInheritedRef();
}
//Count of HomRef/HomRef/HomRef trios
public int getRefRefRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
}
//Count of HomVar/HomVar/HomVar trios
public int getVarVarVar(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
}
//Count of HomRef/HomVar/Het trios
public int getRefVarHet(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
}
//Count of Het/Het/Het trios
public int getHetHetHet(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
}
//Count of Het/Het/HomRef trios
public int getHetHetHomRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
}
//Count of Het/Het/HomVar trios
public int getHetHetHomVar(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
}
//Count of ref alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
//return parentsHetHet_childRef;
}
//Count of var alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedVar(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
//return parentsHetHet_childVar;
}
//Count of ref alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
//return parentsHomRefHet_childRef;
}
//Count of var alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
//return parentsHomRefHet_childVar;
}
//Count of ref alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedRef(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
//return parentsHomVarHet_childRef;
}
//Count of var alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedVar(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
//return parentsHomVarHet_childVar;
}
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
public int getParentsRefRefChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_REF -> HET
public int getParentsRefRefChildHet(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
}
//Count of violations of the type HOM_REF/HET -> HOM_VAR
public int getParentsRefHetChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
public int getParentsRefVarChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
public int getParentsRefVarChildRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HET -> HOM_REF
public int getParentsVarHetChildRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
public int getParentsVarVarChildRef(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
public int getParentsVarVarChildHet(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
}
//Count of violations of the type HOM_VAR/? -> HOM_REF
public int getParentVarChildRef(){
return getParentsRefVarChildRef() + getParentsVarHetChildRef() +getParentsVarVarChildRef();
}
//Count of violations of the type HOM_REF/? -> HOM_VAR
public int getParentRefChildVar(){
return getParentsRefVarChildVar() + getParentsRefHetChildVar() +getParentsRefRefChildVar();
}
//Returns a String containing all trios where a Mendelian violation was observed.
//The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..."
public String getViolationFamiliesString(){
if(violationFamilies.isEmpty())
return "";
Iterator<String> it = violationFamilies.iterator();
String violationFams = it.next();
while(it.hasNext()){
violationFams += ","+it.next();
}
return violationFams;
}
public List<String> getViolationFamilies(){
return violationFamilies;
}
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public String getSampleMom() {
return sampleMom;
}
public String getSampleDad() {
return sampleDad;
}
public String getSampleChild() {
return sampleChild;
}
public double getMinGenotypeQuality() {
return minGenotypeQuality;
}
/**
*
* @param sampleMomP - sample name of mom
* @param sampleDadP - sample name of dad
* @param sampleChildP - sample name of child
*/
public MendelianViolation (String sampleMomP, String sampleDadP, String sampleChildP) {
sampleMom = sampleMomP;
sampleDad = sampleDadP;
sampleChild = sampleChildP;
}
/**
*
* @param family - the sample names string "mom+dad=child"
/**
* Constructor
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
*/
public MendelianViolation(String family, double minGenotypeQualityP) {
minGenotypeQuality = minGenotypeQualityP;
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
sampleMom = m.group(1);
sampleDad = m.group(2);
sampleChild = m.group(3);
}
else
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
/**
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation
*/
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
sampleMom = sample.getMother().getID();
sampleDad = sample.getFather().getID();
sampleChild = sample.getID();
minGenotypeQuality = minGenotypeQualityP;
}
/**
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
*/
public boolean setAlleles (VariantContext vc)
{
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
Genotype gChild = vc.getGenotypes(sampleChild).get(sampleChild);
if (gMom == null || gDad == null || gChild == null)
throw new IllegalArgumentException(String.format("Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s", vc.getChr(), vc.getStart(), sampleMom, sampleDad, sampleChild));
if (gMom.isNoCall() || gDad.isNoCall() || gChild.isNoCall() ||
gMom.getPhredScaledQual() < minGenotypeQuality ||
gDad.getPhredScaledQual() < minGenotypeQuality ||
gChild.getPhredScaledQual() < minGenotypeQuality ) {
return false;
}
allelesMom = gMom.getAlleles();
allelesDad = gDad.getAlleles();
allelesChild = gChild.getAlleles();
return !allelesMom.isEmpty() && !allelesDad.isEmpty() && !allelesChild.isEmpty();
}
/**
*
*/
public MendelianViolation(double minGenotypeQualityP) {
this(minGenotypeQualityP,true);
}
/**
* Constructor
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
*/
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound) {
minGenotypeQuality = minGenotypeQualityP;
this.abortOnSampleNotFound = abortOnSampleNotFound;
violationFamilies = new ArrayList<String>();
createInheritanceMap();
}
/**
* Constructor
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
* @param completeTriosOnly - whether only complete trios are considered or parent/child pairs are too.
*/
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound, boolean completeTriosOnly) {
minGenotypeQuality = minGenotypeQualityP;
this.abortOnSampleNotFound = abortOnSampleNotFound;
violationFamilies = new ArrayList<String>();
createInheritanceMap();
allCalledOnly = completeTriosOnly;
}
/**
* @param families the families to be checked for Mendelian violations
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
*
*/
public boolean isViolation(VariantContext vc)
{
return setAlleles(vc) && isViolation();
}
/**
* @return whether or not there is a mendelian violation at the site.
*/
public boolean isViolation() {
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
return false;
return true;
public int countViolations(Map<String, Set<Sample>> families, VariantContext vc){
//Reset counts
nocall = 0;
lowQual = 0;
familyCalled = 0;
varFamilyCalled = 0;
violations_total=0;
violationFamilies.clear();
clearInheritanceMap();
for(Set<Sample> family : families.values()){
Iterator<Sample> sampleIterator = family.iterator();
Sample sample;
while(sampleIterator.hasNext()){
sample = sampleIterator.next();
if(sample.getParents().size() > 0)
updateViolations(sample.getFamilyID(),sample.getMaternalID(), sample.getPaternalID(), sample.getID() ,vc);
}
}
return violations_total;
}
public boolean isViolation(Sample mother, Sample father, Sample child, VariantContext vc){
//Reset counts
nocall = 0;
lowQual = 0;
familyCalled = 0;
varFamilyCalled = 0;
violations_total=0;
violationFamilies.clear();
clearInheritanceMap();
updateViolations(mother.getFamilyID(),mother.getID(),father.getID(),child.getID(),vc);
return violations_total>0;
}
private void updateViolations(String familyId, String motherId, String fatherId, String childId, VariantContext vc){
int count;
Genotype gMom = vc.getGenotype(motherId);
Genotype gDad = vc.getGenotype(fatherId);
Genotype gChild = vc.getGenotype(childId);
if (gMom == null || gDad == null || gChild == null){
if(abortOnSampleNotFound)
throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getChr(), vc.getStart(), familyId, motherId, fatherId, childId));
else
return;
}
//Count No calls
if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){
nocall++;
}
else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){
nocall++;
}
//Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
else if (minGenotypeQuality>0 && (gMom.getPhredScaledQual() < minGenotypeQuality ||
gDad.getPhredScaledQual() < minGenotypeQuality ||
gChild.getPhredScaledQual() < minGenotypeQuality )) {
lowQual++;
}
else{
//Count all families per loci called
familyCalled++;
//If the family is all homref, not too interesting
if(!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef()))
{
varFamilyCalled++;
if(isViolation(gMom, gDad, gChild)){
violationFamilies.add(familyId);
violations_total++;
}
}
count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType());
inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1);
}
}
private boolean isViolation(Genotype gMom, Genotype gDad, Genotype gChild) {
//1 parent is no "call
if(!gMom.isCalled()){
return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef());
}
else if(!gDad.isCalled()){
return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef());
}
//Both parents have genotype information
return !(gMom.getAlleles().contains(gChild.getAlleles().get(0)) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) ||
gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(gChild.getAlleles().get(0)));
}
private void createInheritanceMap(){
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
for(Genotype.Type mType : Genotype.Type.values()){
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
for(Genotype.Type dType : Genotype.Type.values()){
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
for(Genotype.Type cType : Genotype.Type.values()){
inheritance.get(mType).get(dType).put(cType, 0);
}
}
}
}
private void clearInheritanceMap(){
for(Genotype.Type mType : Genotype.Type.values()){
for(Genotype.Type dType : Genotype.Type.values()){
for(Genotype.Type cType : Genotype.Type.values()){
inheritance.get(mType).get(dType).put(cType, 0);
}
}
}
}
/**
* @return the likelihood ratio for a mendelian violation
*/
public double violationLikelihoodRatio(VariantContext vc) {
public double violationLikelihoodRatio(VariantContext vc, String motherId, String fatherId, String childId) {
double[] logLikAssignments = new double[27];
// the matrix to set up is
// MOM DAD CHILD
@ -152,9 +399,9 @@ public class MendelianViolation {
// AA AB | AB
// |- BB
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
double[] momGL = vc.getGenotype(motherId).getLikelihoods().getAsVector();
double[] dadGL = vc.getGenotype(fatherId).getLikelihoods().getAsVector();
double[] childGL = vc.getGenotype(childId).getLikelihoods().getAsVector();
int offset = 0;
for ( int oMom = 0; oMom < 3; oMom++ ) {
for ( int oDad = 0; oDad < 3; oDad++ ) {

View File

@ -27,11 +27,42 @@ public class SampleDBUnitTest extends BaseTest {
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
private static final Set<Sample> testPEDFamilyF2 = new HashSet<Sample>(Arrays.asList(
new Sample("s2", "fam2", "d2", "m2", Gender.FEMALE, Affection.AFFECTED),
new Sample("d2", "fam2", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("m2", "fam2", null, null, Gender.FEMALE, Affection.UNKNOWN)
));
private static final Set<Sample> testPEDFamilyF3 = new HashSet<Sample>(Arrays.asList(
new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED),
new Sample("d1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN),
new Sample("m1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN)
));
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
private static final HashMap<String, Set<Sample>> testGetFamilies = new HashMap<String,Set<Sample>>();
static {
testGetFamilies.put("fam1", testPEDSamples);
testGetFamilies.put("fam2", testPEDFamilyF2);
testGetFamilies.put("fam3", testPEDFamilyF3);
}
private static final Set<Sample> testKidsWithParentsFamilies2 = new HashSet<Sample>(Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("kid3", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED),
new Sample("kid2", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED)));
private static final HashSet<String> testGetPartialFamiliesIds = new HashSet<String>(Arrays.asList("kid","s1"));
private static final HashMap<String, Set<Sample>> testGetPartialFamilies = new HashMap<String,Set<Sample>>();
static {
testGetPartialFamilies.put("fam1", new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
testGetPartialFamilies.put("fam3", new HashSet<Sample>(Arrays.asList(new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED))));
}
private static final String testPEDString =
String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2",
@ -46,6 +77,18 @@ public class SampleDBUnitTest extends BaseTest {
"fam3 s1 d1 m1 2 2",
"fam2 s2 d2 m2 2 2");
private static final String testPEDMultipleFamilies2 =
String.format("%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2",
"fam4 kid4 dad4 0 1 2",
"fam4 dad4 0 0 1 1",
"fam5 kid2 dad2 mom2 1 2",
"fam5 kid3 dad2 mom2 1 2",
"fam5 dad2 0 0 1 1",
"fam5 mom2 0 0 2 2");
private static final String testPEDStringInconsistentGender =
"fam1 kid 0 0 2 2";
@ -138,6 +181,25 @@ public class SampleDBUnitTest extends BaseTest {
Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
}
@Test()
public void getFamilies(){
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(db.getFamilies(),testGetFamilies);
Assert.assertEquals(db.getFamilies(null),testGetFamilies);
Assert.assertEquals(db.getFamilies(testGetPartialFamiliesIds),testGetPartialFamilies);
}
@Test()
public void testGetChildrenWithParents()
{
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies2));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(db.getChildrenWithParents(), testKidsWithParentsFamilies2);
Assert.assertEquals(db.getChildrenWithParents(false), testKidsWithParentsFamilies2);
Assert.assertEquals(db.getChildrenWithParents(true), new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
}
@Test()
public void loadFamilyIDs() {
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));

View File

@ -291,6 +291,17 @@ public class VariantEvalIntegrationTest extends WalkerTest {
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
}
@Test
public void testVEMendelianViolationEvaluator() {
String vcfFile = "/MendelianViolationEval.vcf";
String pedFile = "/MendelianViolationEval.ped";
WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST",
1,
Arrays.asList("66e72c887124f40933d32254b2dd44a3"));
executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec);
}
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";

View File

@ -115,4 +115,26 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testUsingDbsnpName--" + testFile, spec);
}
@Test
public void testParallelization() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testParallelization (2 threads)--" + testfile, spec);
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testParallelization (4 threads)--" + testfile, spec);
}
}

View File

@ -83,6 +83,10 @@ class DataProcessingPipeline extends QScript {
@Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false)
var defaultPlatform: String = ""
@Hidden
@Input(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false)
var testMode: Boolean = false
/****************************************************************************
* Global Variables
@ -335,6 +339,7 @@ class DataProcessingPipeline extends QScript {
this.known ++= qscript.indels
this.consensusDeterminationModel = cleanModelEnum
this.compress = 0
this.noPGTag = qscript.testMode;
this.scatterCount = nContigs
this.analysisName = queueLogDir + outBam + ".clean"
this.jobName = queueLogDir + outBam + ".clean"
@ -360,6 +365,7 @@ class DataProcessingPipeline extends QScript {
this.out = outBam
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
this.no_pg_tag = qscript.testMode
this.scatterCount = nContigs
this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration"

View File

@ -47,6 +47,10 @@ class PacbioProcessingPipeline extends QScript {
@Input(shortName="bwastring", required=false)
var bwastring: String = ""
@Hidden
@Input(shortName = "test", fullName = "test_mode", required = false)
var testMode: Boolean = false
val queueLogDir: String = ".qlog/"
def script = {
@ -170,6 +174,7 @@ class PacbioProcessingPipeline extends QScript {
this.input_file :+= inBam
this.recal_file = inRecalFile
this.out = outBam
this.no_pg_tag = testMode
this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration"
this.jobName = queueLogDir + outBam + ".recalibration"
@ -177,7 +182,6 @@ class PacbioProcessingPipeline extends QScript {
}
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
this.resources = R
this.recal_file = inRecalFile
this.output_dir = outPath
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"

View File

@ -0,0 +1,69 @@
package org.broadinstitute.sting.queue.pipeline
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.BaseTest
class DataProcessingPipelineTest {
@Test
def testSimpleBAM {
val projectName = "test1"
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
val spec = new PipelineTestSpec
spec.name = "DataProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
" -nv ",
" -test ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "1f85e76de760167a77ed1d9ab4da2936"
PipelineTest.executeTest(spec)
}
@Test
def testBWAPEBAM {
val projectName = "test2"
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
val spec = new PipelineTestSpec
spec.name = "DataProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
" -nv ",
" -test ",
" -bwa /home/unix/carneiro/bin/bwa",
" -bwape ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "57416a0abdf9524bc92834d466529708"
PipelineTest.executeTest(spec)
}
}

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.queue.pipeline
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.BaseTest
class PacbioProcessingPipelineTest {
@Test
def testBAM {
val testOut = "exampleBAM.recal.bam"
val spec = new PipelineTestSpec
spec.name = "pacbioProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -blasr ",
" -test ",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString
spec.fileMD5s += testOut -> "f0adce660b55cb91d5f987f9a145471e"
PipelineTest.executeTest(spec)
}
}

Binary file not shown.

Binary file not shown.

282
public/testdata/exampleDBSNP.vcf vendored 100644
View File

@ -0,0 +1,282 @@
##fileformat=VCFv4.1
##FILTER=<ID=NC,Description="Inconsistent Genotype Submission For At Least One Sample">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=ASP,Number=0,Type=Flag,Description="Is Assembly specific. This is set if the variant only maps to one assembly">
##INFO=<ID=ASS,Number=0,Type=Flag,Description="In acceptor splice site FxnCode = 73">
##INFO=<ID=CDA,Number=0,Type=Flag,Description="Variation is interrogated in a clinical diagnostic assay">
##INFO=<ID=CFL,Number=0,Type=Flag,Description="Has Assembly conflict. This is for weight 1 and 2 variant that maps to different chromosomes on different assemblies.">
##INFO=<ID=CLN,Number=0,Type=Flag,Description="Variant is Clinical(LSDB,OMIM,TPA,Diagnostic)">
##INFO=<ID=DSS,Number=0,Type=Flag,Description="In donor splice-site FxnCode = 75">
##INFO=<ID=G5,Number=0,Type=Flag,Description=">5% minor allele frequency in 1+ populations">
##INFO=<ID=G5A,Number=0,Type=Flag,Description=">5% minor allele frequency in each and all populations">
##INFO=<ID=GCF,Number=0,Type=Flag,Description="Has Genotype Conflict Same (rs, ind), different genotype. N/N is not included.">
##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar (|)">
##INFO=<ID=GMAF,Number=1,Type=Float,Description="Global Minor Allele Frequency [0, 0.5]; global population is 1000GenomesProject phase 1 genotype data from 629 individuals, released in the 08-04-2010 dataset">
##INFO=<ID=GNO,Number=0,Type=Flag,Description="Genotypes available. The variant has individual genotype (in SubInd table).">
##INFO=<ID=HD,Number=0,Type=Flag,Description="Marker is on high density genotyping kit (50K density or greater). The variant may have phenotype associations present in dbGaP.">
##INFO=<ID=INT,Number=0,Type=Flag,Description="In Intron FxnCode = 6">
##INFO=<ID=KGPROD,Number=0,Type=Flag,Description="1000 Genome production phase">
##INFO=<ID=KGPilot1,Number=0,Type=Flag,Description="1000 Genome discovery(pilot1) 2009">
##INFO=<ID=KGPilot123,Number=0,Type=Flag,Description="1000 Genome discovery all pilots 2010(1,2,3)">
##INFO=<ID=KGVAL,Number=0,Type=Flag,Description="1000 Genome validated by second method">
##INFO=<ID=LSD,Number=0,Type=Flag,Description="Submitted from a locus-specific database">
##INFO=<ID=MTP,Number=0,Type=Flag,Description="Microattribution/third-party annotation(TPA:GWAS,PAGE)">
##INFO=<ID=MUT,Number=0,Type=Flag,Description="Is mutation (journal citation, explicit fact): a low frequency variation that is cited in journal and other reputable sources">
##INFO=<ID=NOC,Number=0,Type=Flag,Description="Contig allele not present in variant allele list. The reference sequence allele at the mapped position is not present in the variant allele list, adjusted for orientation.">
##INFO=<ID=NOV,Number=0,Type=Flag,Description="Rs cluster has non-overlapping allele sets. True when rs set has more than 2 alleles from different submissions and these sets share no alleles in common.">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=NSF,Number=0,Type=Flag,Description="Has non-synonymous frameshift A coding region variation where one allele in the set changes all downstream amino acids. FxnClass = 44">
##INFO=<ID=NSM,Number=0,Type=Flag,Description="Has non-synonymous missense A coding region variation where one allele in the set changes protein peptide. FxnClass = 42">
##INFO=<ID=NSN,Number=0,Type=Flag,Description="Has non-synonymous nonsense A coding region variation where one allele in the set changes to STOP codon (TER). FxnClass = 41">
##INFO=<ID=OM,Number=0,Type=Flag,Description="Has OMIM/OMIA">
##INFO=<ID=OTH,Number=0,Type=Flag,Description="Has other variant with exactly the same set of mapped positions on NCBI refernce assembly.">
##INFO=<ID=PH1,Number=0,Type=Flag,Description="Phase 1 genotyped: filtered, non-redundant">
##INFO=<ID=PH2,Number=0,Type=Flag,Description="Phase 2 genotyped: filtered, non-redundant">
##INFO=<ID=PH3,Number=0,Type=Flag,Description="Phase 3 genotyped: filtered, non-redundant">
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
##INFO=<ID=PMC,Number=0,Type=Flag,Description="Links exist to PubMed Central article">
##INFO=<ID=R3,Number=0,Type=Flag,Description="In 3' gene region FxnCode = 13">
##INFO=<ID=R5,Number=0,Type=Flag,Description="In 5' gene region FxnCode = 15">
##INFO=<ID=REF,Number=0,Type=Flag,Description="Has reference A coding region variation where one allele in the set is identical to the reference sequence. FxnCode = 8">
##INFO=<ID=RSPOS,Number=1,Type=Integer,Description="Chr position reported in dbSNP">
##INFO=<ID=RV,Number=0,Type=Flag,Description="RS orientation is reversed">
##INFO=<ID=S3D,Number=0,Type=Flag,Description="Has 3D structure - SNP3D table">
##INFO=<ID=SAO,Number=1,Type=Integer,Description="Variant Allele Origin: 0 - unspecified, 1 - Germline, 2 - Somatic, 3 - Both">
##INFO=<ID=SCS,Number=1,Type=Integer,Description="Variant Clinical Significance, 0 - unknown, 1 - untested, 2 - non-pathogenic, 3 - probable-non-pathogenic, 4 - probable-pathogenic, 5 - pathogenic, 6 - drug-response, 7 - histocompatibility, 255 - other">
##INFO=<ID=SLO,Number=0,Type=Flag,Description="Has SubmitterLinkOut - From SNP->SubSNP->Batch.link_out">
##INFO=<ID=SSR,Number=1,Type=Integer,Description="Variant Suspect Reason Code, 0 - unspecified, 1 - Paralog, 2 - byEST, 3 - Para_EST, 4 - oldAlign, 5 - other">
##INFO=<ID=SYN,Number=0,Type=Flag,Description="Has synonymous A coding region variation where one allele in the set does not change the encoded amino acid. FxnCode = 3">
##INFO=<ID=TPA,Number=0,Type=Flag,Description="Provisional Third Party Annotation(TPA) (currently rs from PHARMGKB who will give phenotype data)">
##INFO=<ID=U3,Number=0,Type=Flag,Description="In 3' UTR Location is in an untranslated region (UTR). FxnCode = 53">
##INFO=<ID=U5,Number=0,Type=Flag,Description="In 5' UTR Location is in an untranslated region (UTR). FxnCode = 55">
##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
##INFO=<ID=VLD,Number=0,Type=Flag,Description="Is Validated. This bit is set if the variant has 2+ minor allele count based on frequency or genotype data.">
##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property">
##INFO=<ID=WGT,Number=1,Type=Integer,Description="Weight, 00 - unmapped, 1 - weight 1, 2 - weight 2, 3 - weight 3 or more">
##INFO=<ID=WTD,Number=0,Type=Flag,Description="Is Withdrawn by submitter If one member ss is withdrawn by submitter, then this bit is set. If all member ss' are withdrawn, then the rs is deleted to SNPHistory">
##INFO=<ID=dbSNPBuildID,Number=1,Type=Integer,Description="First dbSNP Build for RS">
##LeftAlignVariants="analysis_type=LeftAlignVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=00-All.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false"
##contig=<ID=chr1,length=249250621,assembly=b37>
##phasing=partial
##reference=GRCh37.3
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
##source=dbSNP
##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 10144 rs144773400 TA T . PASS ASP;RSPOS=10145;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10228 rs143255646 TA T . PASS ASP;RSPOS=10229;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10234 rs145599635 C T . PASS ASP;RSPOS=10234;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10248 rs148908337 A T . PASS ASP;RSPOS=10248;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10254 rs140194106 TA T . PASS ASP;RSPOS=10255;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10291 rs145427775 C T . PASS ASP;RSPOS=10291;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10327 rs112750067 T C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10327;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 10329 rs150969722 AC A . PASS ASP;RSPOS=10330;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10351 rs145072688 CTA C,CA . PASS ASP;RSPOS=10352;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10382 rs147093981 AAC A,AC . PASS ASP;RSPOS=10383;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10433 rs56289060 A AC . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10433;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 10439 rs112766696 AC A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10440;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=132
chr1 10439 rs138941843 AC A . PASS ASP;RSPOS=10440;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10440 rs112155239 C A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10440;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 10492 rs55998931 C T . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.0617001828153565;RSPOS=10492;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040000000100;WGT=0;dbSNPBuildID=129
chr1 10519 rs62636508 G C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10519;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
chr1 10583 rs58108140 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.270566727605119;KGPilot123;RSPOS=10583;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=129
chr1 10611 rs189107123 C G . PASS KGPilot123;RSPOS=10611;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 10828 rs10218492 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10828;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
chr1 10904 rs10218493 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10904;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=119
chr1 10927 rs10218527 A G . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10927;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
chr1 10938 rs28853987 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10938;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11014 rs28484712 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11014;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11022 rs28775022 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11022;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11081 rs10218495 G T . PASS CFL;GENEINFO=LOC100652771:100652771;GNO;RSPOS=11081;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=119
chr1 11863 rs187669455 C A . PASS RSPOS=11863;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
chr1 13302 rs180734498 C T . PASS KGPilot123;RSPOS=13302;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 13327 rs144762171 G C . PASS ASP;KGPilot123;RSPOS=13327;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 13684 rs71260404 C T . PASS GENEINFO=LOC100652771:100652771;GNO;RSPOS=13684;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 13980 rs151276478 T C . PASS ASP;KGPilot123;RSPOS=13980;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 14889 rs142444908 G A . PASS ASP;RSPOS=14889;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 14907 rs79585140 A G . PASS GNO;RSPOS=14907;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 14930 rs75454623 A G . PASS GNO;RSPOS=14930;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 14976 rs71252251 G A . PASS ASP;GNO;RSPOS=14976;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
chr1 15061 rs71268703 T TG . PASS ASP;GNO;RSPOS=15061;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 15118 rs71252250 A G . PASS ASP;GNO;RSPOS=15118;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
chr1 15211 rs144718396 T G . PASS ASP;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 15211 rs78601809 T G . PASS ASP;GNO;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 16257 rs78588380 G C . PASS ASP;GNO;RSPOS=16257;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 16378 rs148220436 T C . PASS ASP;RSPOS=16378;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 16495 rs141130360 G C . PASS ASP;RSPOS=16495;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 16497 rs150723783 A G . PASS ASP;RSPOS=16497;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 17519 rs192890528 G T . PASS RSPOS=17519;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
chr1 19226 rs138930629 T A . PASS ASP;RSPOS=19226;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 20141 rs56336884 G A . PASS HD;RSPOS=20141;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000400000100;WGT=0;dbSNPBuildID=129
chr1 20144 rs143346096 G A . PASS ASP;RSPOS=20144;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 20206 rs71262675 C T . PASS GNO;RSPOS=20206;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 20245 rs71262674 G A . PASS GMAF=0.256398537477148;GNO;RSPOS=20245;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 20304 rs71262673 G C . PASS GMAF=0.338208409506399;GNO;RSPOS=20304;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 26999 rs147506580 A G . PASS ASP;RSPOS=26999;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 29436 rs2462493 G A . PASS GNO;RSPOS=29436;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=100
chr1 30923 rs140337953 G T . PASS ASP;KGPilot123;RSPOS=30923;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 33487 rs77459554 C T . PASS ASP;GNO;RSPOS=33487;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 33495 rs75468675 C T . PASS ASP;GNO;RSPOS=33495;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33505 rs75627161 T C . PASS ASP;GNO;RSPOS=33505;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33508 rs75609629 A T . PASS ASP;GNO;RSPOS=33508;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33521 rs76098219 T A . PASS GNO;RSPOS=33521;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 33593 rs557585 G A . PASS RSPOS=33593;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=83
chr1 33648 rs62028204 G T . PASS RSPOS=33648;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
chr1 33656 rs113821789 T C . PASS RSPOS=33656;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=132
chr1 51476 rs187298206 T C . PASS KGPilot123;RSPOS=51476;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51479 rs116400033 T A . PASS ASP;G5;G5A;GMAF=0.113802559414991;KGPilot123;RSPOS=51479;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070010000100;WGT=0;dbSNPBuildID=132
chr1 51803 rs62637812 T C . PASS GMAF=0.468921389396709;RSPOS=51803;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040000000100;WGT=0;dbSNPBuildID=129
chr1 51898 rs76402894 C A . PASS GMAF=0.0731261425959781;GNO;RSPOS=51898;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
chr1 51914 rs190452223 T G . PASS KGPilot123;RSPOS=51914;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51928 rs78732933 G A . PASS GNO;RSPOS=51928;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
chr1 51935 rs181754315 C T . PASS KGPilot123;RSPOS=51935;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51954 rs185832753 G C . PASS KGPilot123;RSPOS=51954;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 52058 rs62637813 G C . PASS GMAF=0.0342778793418647;KGPilot123;RSPOS=52058;SAO=0;SSR=1;VC=SNV;VLD;VP=050000000000040010000140;WGT=0;dbSNPBuildID=129
chr1 52144 rs190291950 T A . PASS KGPilot123;RSPOS=52144;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 52238 rs150021059 T G . PASS ASP;KGPilot123;RSPOS=52238;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54353 rs140052487 C A . PASS ASP;KGPilot123;RSPOS=54353;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54421 rs146477069 A G . PASS ASP;KGPilot123;RSPOS=54421;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54490 rs141149254 G A . PASS ASP;KGPilot123;RSPOS=54490;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54676 rs2462492 C T . PASS ASP;GMAF=0.191956124314442;GNO;HD;KGPilot123;RSPOS=54676;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040510000100;WGT=0;dbSNPBuildID=100
chr1 54753 rs143174675 T G . PASS ASP;KGPilot123;RSPOS=54753;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54788 rs59861892 CC C,CCT . PASS ASP;RSPOS=54789;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 54795 rs58014817 T A . PASS ASP;RSPOS=54795;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
chr1 55164 rs3091274 C A . PASS G5;G5A;GMAF=0.145338208409506;GNO;KGPilot123;RSPOS=55164;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030110000100;WGT=0;dbSNPBuildID=103
chr1 55299 rs10399749 C T . PASS G5;G5A;GMAF=0.278793418647166;GNO;KGPilot123;PH2;RSPOS=55299;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030112000100;WGT=0;dbSNPBuildID=119
chr1 55302 rs3091273 C T . PASS RSPOS=55302;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
chr1 55313 rs182462964 A T . PASS KGPilot123;RSPOS=55313;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55322 rs3107974 C T . PASS RSPOS=55322;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
chr1 55326 rs3107975 T C . PASS GNO;HD;KGPilot123;RSPOS=55326;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=103
chr1 55330 rs185215913 G A . PASS KGPilot123;RSPOS=55330;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55367 rs190850374 G A . PASS KGPilot123;RSPOS=55367;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55388 rs182711216 C T . PASS KGPilot123;RSPOS=55388;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55394 rs2949420 T A . PASS GNO;KGPilot123;PH2;RSPOS=55394;SAO=0;SSR=0;VC=SNV;VP=050000000000000112000100;WGT=0;dbSNPBuildID=101
chr1 55416 rs193242050 G A . PASS KGPilot123;RSPOS=55416;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55427 rs183189405 T C . PASS KGPilot123;RSPOS=55427;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55545 rs28396308 C T . PASS GNO;RSPOS=55545;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=125
chr1 55816 rs187434873 G A . PASS KGPilot123;RSPOS=55816;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55850 rs191890754 C G . PASS KGPilot123;RSPOS=55850;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55852 rs184233019 G C . PASS KGPilot123;RSPOS=55852;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 56644 rs143342222 A C . PASS ASP;KGPilot123;RSPOS=56644;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 57952 rs189727433 A C . PASS KGPilot123;RSPOS=57952;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 58771 rs140128481 T C . PASS ASP;RSPOS=58771;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 58814 rs114420996 G A . PASS ASP;G5;GMAF=0.0982632541133455;KGPilot123;RSPOS=58814;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
chr1 59040 rs149755937 T C . PASS ASP;KGPilot123;RSPOS=59040;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 60718 rs78395614 G A . PASS CFL;GNO;RSPOS=60718;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 60726 rs192328835 C A . PASS KGPilot123;RSPOS=60726;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 60791 rs76199781 A G . PASS CFL;GNO;RSPOS=60791;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61442 rs74970982 A G . PASS CFL;GMAF=0.076782449725777;GNO;KGPilot123;RSPOS=61442;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61462 rs56992750 T A . PASS CFL;G5;G5A;GMAF=0.0383912248628885;GNO;KGPilot123;RSPOS=61462;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008030110000100;WGT=0;dbSNPBuildID=129
chr1 61480 rs75526266 G C . PASS CFL;GNO;RSPOS=61480;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61499 rs75719746 G A . PASS CFL;GNO;RSPOS=61499;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61743 rs184286948 G C . PASS KGPilot123;RSPOS=61743;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 61920 rs62637820 G A . PASS CFL;GMAF=0.0255941499085923;RSPOS=61920;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=129
chr1 61987 rs76735897 A G . PASS CFL;GMAF=0.292961608775137;GNO;KGPilot123;RSPOS=61987;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61989 rs77573425 G C . PASS CFL;GMAF=0.309414990859232;GNO;KGPilot123;RSPOS=61989;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61993 rs190553843 C T . PASS KGPilot123;RSPOS=61993;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 62156 rs181864839 C T . PASS KGPilot123;RSPOS=62156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 62157 rs10399597 G A . PASS CFL;GMAF=0.00228519195612431;KGPilot123;RSPOS=62157;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=119
chr1 62162 rs140556834 G A . PASS ASP;KGPilot123;RSPOS=62162;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 62203 rs28402963 T C . PASS CFL;KGPilot123;RSPOS=62203;SAO=0;SSR=0;VC=SNV;VP=050000000008000010000100;WGT=0;dbSNPBuildID=125
chr1 62271 rs28599927 A G . PASS CFL;GMAF=0.138482632541133;RSPOS=62271;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=125
chr1 63268 rs75478250 T C . PASS CFL;GNO;RSPOS=63268;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 63276 rs185977555 G A . PASS KGPilot123;RSPOS=63276;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 63297 rs188886746 G A . PASS KGPilot123;RSPOS=63297;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 63671 rs116440577 G A . PASS ASP;G5;GMAF=0.170018281535649;KGPilot123;RSPOS=63671;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
chr1 63737 rs77426996 TACT T,TCTA . PASS CFL;RSPOS=63738;SAO=0;SSR=0;VC=DIV;VP=050000000008000000000200;WGT=0;dbSNPBuildID=131
chr1 64649 rs181431124 A C . PASS KGPilot123;RSPOS=64649;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66008 rs2691286 C G . PASS CFL;GNO;RSPOS=66008;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=100
chr1 66162 rs62639105 A T . PASS CFL;GMAF=0.320383912248629;GNO;KGPilot123;RSPOS=66162;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000110000100;WGT=0;dbSNPBuildID=129
chr1 66176 rs28552463 T A . PASS CFL;GMAF=0.0484460694698355;KGPilot123;RSPOS=66176;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=125
chr1 66219 rs181028663 A T . PASS KGPilot123;RSPOS=66219;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66238 rs113961546 T A . PASS CFL;GNO;RSPOS=66238;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=132
chr1 66314 rs28534012 T A . PASS CFL;RSPOS=66314;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
chr1 66331 rs186063952 A C . PASS KGPilot123;RSPOS=66331;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66334 rs28464214 T A . PASS CFL;RSPOS=66334;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
chr1 66442 rs192044252 T A . PASS KGPilot123;RSPOS=66442;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66457 rs13328655 T A . PASS CFL;GMAF=0.0795246800731261;KGPilot123;RSPOS=66457;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=121
chr1 66503 rs112350669 T A . PASS CFL;RSPOS=66503;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=132
chr1 66507 rs12401368 T A . PASS CFL;GMAF=0.479890310786106;KGPilot123;RSPOS=66507;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=120
chr1 66651 rs2257270 A T . PASS CFL;GNO;RSPOS=66651;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=100
chr1 67179 rs149952626 C G . PASS ASP;KGPilot123;RSPOS=67179;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 67181 rs77662731 A G . PASS ASP;G5;G5A;GENEINFO=OR4F5:79501;GMAF=0.0470749542961609;GNO;KGPilot123;RSPOS=67181;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070110000100;WGT=0;dbSNPBuildID=131
chr1 67223 rs78676975 C A . PASS ASP;GENEINFO=OR4F5:79501;GNO;RSPOS=67223;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 69428 rs140739101 T G . PASS ASP;RSPOS=69428;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69453 rs142004627 G A . PASS ASP;RSPOS=69453;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 69476 rs148502021 T C . PASS ASP;RSPOS=69476;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69496 rs150690004 G A . PASS ASP;RSPOS=69496;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69511 rs75062661 A G . PASS GENEINFO=OR4F5:79501;GMAF=0.193784277879342;GNO;KGPilot123;RSPOS=69511;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000110000100;WGT=0;dbSNPBuildID=131
chr1 69534 rs190717287 T C . PASS KGPilot123;RSPOS=69534;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000010000100;WGT=0;dbSNPBuildID=135
chr1 69552 rs55874132 G C . PASS GENEINFO=OR4F5:79501;HD;RSPOS=69552;S3D;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050300000000040400000100;WGT=0;dbSNPBuildID=129
chr1 69590 rs141776804 T A . PASS ASP;RSPOS=69590;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 69594 rs144967600 T C . PASS ASP;RSPOS=69594;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 72148 rs182862337 C T . PASS KGPilot123;RSPOS=72148;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 73841 rs143773730 C T . PASS ASP;KGPilot123;RSPOS=73841;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 74651 rs62641291 G A . PASS RSPOS=74651;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
chr1 74681 rs13328683 G T . PASS CFL;GMAF=0.286106032906764;RSPOS=74681;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=121
chr1 74709 rs62641292 T A . PASS CFL;RSPOS=74709;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=129
chr1 74771 rs13328675 A G . PASS CFL;RSPOS=74771;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 74790 rs13328700 C G . PASS CFL;RSPOS=74790;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 74792 rs13328684 G A . PASS CFL;RSPOS=74792;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 77462 rs188023513 G A . PASS KGPilot123;RSPOS=77462;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77470 rs192898053 T C . PASS KGPilot123;RSPOS=77470;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77874 rs184538873 G A . PASS KGPilot123;RSPOS=77874;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77961 rs78385339 G A . PASS GMAF=0.125685557586837;KGPilot123;RSPOS=77961;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040010000100;WGT=0;dbSNPBuildID=131
chr1 79033 rs62641298 A G . PASS GMAF=0.438299817184644;GNO;HD;KGPilot123;RSPOS=79033;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=129
chr1 79050 rs62641299 G T . PASS GMAF=0.224405850091408;GNO;KGPilot123;RSPOS=79050;SAO=0;SSR=0;VC=SNV;VP=050000000000000110000100;WGT=0;dbSNPBuildID=129
chr1 79137 rs143777184 A T . PASS ASP;KGPilot123;RSPOS=79137;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 79417 rs184768190 C T . PASS KGPilot123;RSPOS=79417;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 79418 rs2691296 G C . PASS GMAF=0.0178244972577697;RSPOS=79418;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=100
chr1 79538 rs2691295 C T . PASS RSPOS=79538;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=100
chr1 79772 rs147215883 C G . PASS ASP;KGPilot123;RSPOS=79772;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 79872 rs189224661 T G . PASS KGPilot123;RSPOS=79872;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 80323 rs3942603 G C . PASS CFL;GNO;RSPOS=80323;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=108
chr1 80386 rs3878915 C A . PASS GMAF=0.0118829981718464;RSPOS=80386;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=108
chr1 80454 rs144226842 G C . PASS ASP;KGPilot123;RSPOS=80454;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 81836 rs2259560 A T . PASS ASP;GNO;RSPOS=81836;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=100
chr1 81949 rs181567186 T C . PASS KGPilot123;RSPOS=81949;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 81962 rs4030308 T TAA . PASS ASP;RSPOS=81962;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 82102 rs4030307 C T . PASS ASP;RSPOS=82102;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82103 rs2020400 T C . PASS ASP;RSPOS=82103;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
chr1 82126 rs1815133 C T . PASS ASP;RSPOS=82126;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
chr1 82133 rs4030306 CA C,CAAAAAAAAAAAAAAA . PASS ASP;RSPOS=82136;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 82154 rs4477212 A G . PASS ASP;HD;RSPOS=82154;SAO=0;SSR=0;VC=SNV;VP=050000000004000400000100;WGT=0;dbSNPBuildID=111
chr1 82162 rs1815132 C A . PASS ASP;GMAF=0.0351919561243144;GNO;RSPOS=82162;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=92
chr1 82163 rs139113303 G A . PASS ASP;KGPilot123;RSPOS=82163;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 82196 rs112844054 A T . PASS ASP;RSPOS=82196;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 82249 rs1851945 A G . PASS ASP;GMAF=0.0452468007312614;KGPilot123;RSPOS=82249;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=92
chr1 82282 rs3871775 G A . PASS ASP;RSPOS=82282;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82303 rs3871776 T C . PASS ASP;RSPOS=82303;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82316 rs4030305 A C . PASS ASP;GNO;RSPOS=82316;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=108
chr1 82609 rs149189449 C G . PASS ASP;KGPilot123;RSPOS=82609;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 82676 rs185237834 T G . PASS KGPilot123;RSPOS=82676;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 82734 rs4030331 T C . PASS ASP;GMAF=0.261882998171846;KGPilot123;RSPOS=82734;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=108
chr1 82957 rs189774606 C T . PASS KGPilot123;RSPOS=82957;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83084 rs181193408 T A . PASS KGPilot123;RSPOS=83084;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83088 rs186081601 G C . PASS KGPilot123;RSPOS=83088;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83107 rs4405097 G C . PASS ASP;RSPOS=83107;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=111
chr1 83119 rs4030324 AA A,ATAAC . PASS ASP;RSPOS=83120;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 83771 rs189906733 T G . PASS KGPilot123;RSPOS=83771;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83786 rs58520670 T TA . PASS ASP;RSPOS=83794;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83815 rs58857344 GAGAA G . PASS ASP;RSPOS=83827;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83826 rs71281475 AAAGA A,AAA . PASS ASP;GNO;RSPOS=83827;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 83855 rs59596480 GAA G . PASS ASP;RSPOS=83857;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83872 rs59556914 AA A,AAGA . PASS ASP;RSPOS=83873;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83884 rs59586754 GAAA G . PASS ASP;RSPOS=83885;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83897 rs61330047 GAA G . PASS ASP;RSPOS=83899;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83901 rs58254183 GAAAGAA G . PASS ASP;RSPOS=83903;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83921 rs61338823 GAA G . PASS ASP;RSPOS=83923;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83930 rs71281474 AG A,AGA . PASS ASP;GNO;RSPOS=83931;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 83934 rs59235392 AG A,AGAAA . PASS ASP;RSPOS=83935;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83977 rs180759811 A G . PASS KGPilot123;RSPOS=83977;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84002 rs28850140 G A . PASS ASP;GMAF=0.138939670932358;KGPilot123;RSPOS=84002;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=125
chr1 84010 rs186443818 G A . PASS KGPilot123;RSPOS=84010;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84018 rs61352176 GAA G . PASS ASP;RSPOS=84020;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 84079 rs190867312 T C . PASS KGPilot123;RSPOS=84079;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84139 rs183605470 A T . PASS KGPilot123;RSPOS=84139;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84156 rs188652299 A C . PASS KGPilot123;RSPOS=84156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84244 rs191297051 A C . PASS KGPilot123;RSPOS=84244;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84295 rs183209871 G A . PASS KGPilot123;RSPOS=84295;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84346 rs187855973 T C . PASS KGPilot123;RSPOS=84346;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84453 rs191379015 C G . PASS KGPilot123;RSPOS=84453;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84705 rs183470350 T G . PASS KGPilot123;RSPOS=84705;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135

View File

@ -0,0 +1 @@
100000 1 0

View File

@ -0,0 +1,3 @@
100000 1 11
0 chr1 (null)
0 100000 0

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.