Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mark DePristo 2011-12-13 18:19:41 -05:00
commit 7dd5c74591
37 changed files with 1367 additions and 673 deletions

View File

@ -711,6 +711,8 @@ public class SAMDataSource {
* @param validationStringency validation stringency. * @param validationStringency validation stringency.
*/ */
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) { public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1;
for(SAMReaderID readerID: readerIDs) { for(SAMReaderID readerID: readerIDs) {
File indexFile = findIndexFile(readerID.samFile); File indexFile = findIndexFile(readerID.samFile);
@ -728,8 +730,7 @@ public class SAMDataSource {
reader.enableFileSource(true); reader.enableFileSource(true);
reader.setValidationStringency(validationStringency); reader.setValidationStringency(validationStringency);
final SAMFileHeader header = reader.getFileHeader(); logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
readers.put(readerID,reader); readers.put(readerID,reader);
} }

View File

@ -142,20 +142,75 @@ public class SampleDB {
* @return * @return
*/ */
public final Map<String, Set<Sample>> getFamilies() { public final Map<String, Set<Sample>> getFamilies() {
return getFamilies(null);
}
/**
* Returns a map from family ID -> set of family members for all samples in sampleIds with
* non-null family ids
*
* @param sampleIds - all samples to include. If null is passed then all samples are returned.
* @return
*/
public final Map<String, Set<Sample>> getFamilies(Collection<String> sampleIds) {
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>(); final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
for ( final Sample sample : samples.values() ) { for ( final Sample sample : samples.values() ) {
final String famID = sample.getFamilyID(); if(sampleIds == null || sampleIds.contains(sample.getID())){
if ( famID != null ) { final String famID = sample.getFamilyID();
if ( ! families.containsKey(famID) ) if ( famID != null ) {
families.put(famID, new TreeSet<Sample>()); if ( ! families.containsKey(famID) )
families.get(famID).add(sample); families.put(famID, new TreeSet<Sample>());
families.get(famID).add(sample);
}
} }
} }
return families; return families;
} }
/**
* Returns the set of all children that have both of their parents.
* Note that if a family is composed of more than 1 child, each child is
* returned.
* @return - all the children that have both of their parents
*/
public final Set<Sample> getChildrenWithParents(){
return getChildrenWithParents(false);
}
/**
* Returns the set of all children that have both of their parents.
* Note that if triosOnly = false, a family is composed of more than 1 child, each child is
* returned.
*
* This method can be used wherever trios are needed
*
* @param triosOnly - if set to true, only strict trios are returned
* @return - all the children that have both of their parents
*/
public final Set<Sample> getChildrenWithParents(boolean triosOnly) {
Map<String, Set<Sample>> families = getFamilies();
final Set<Sample> childrenWithParents = new HashSet<Sample>();
Iterator<Sample> sampleIterator;
for ( Set<Sample> familyMembers: families.values() ) {
if(triosOnly && familyMembers.size() != 3)
continue;
sampleIterator = familyMembers.iterator();
Sample sample;
while(sampleIterator.hasNext()){
sample = sampleIterator.next();
if(sample.getParents().size() == 2 && familyMembers.containsAll(sample.getParents()))
childrenWithParents.add(sample);
}
}
return childrenWithParents;
}
/** /**
* Return all samples with a given family ID * Return all samples with a given family ID
* @param familyId * @param familyId

View File

@ -88,7 +88,7 @@ public abstract class Walker<MapType, ReduceType> {
return getToolkit().getMasterSequenceDictionary(); return getToolkit().getMasterSequenceDictionary();
} }
protected SampleDB getSampleDB() { public SampleDB getSampleDB() {
return getToolkit().getSampleDB(); return getToolkit().getSampleDB();
} }

View File

@ -3,22 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays; import java.util.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -30,23 +26,26 @@ import java.util.Map;
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
private MendelianViolation mendelianViolation = null; private MendelianViolation mendelianViolation = null;
private String motherId;
private String fatherId;
private String childId;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( mendelianViolation == null ) { if ( mendelianViolation == null ) {
if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) { if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
} }
else { else {
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line."); throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
} }
} }
Map<String,Object> toRet = new HashMap<String,Object>(1); Map<String,Object> toRet = new HashMap<String,Object>(1);
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() && boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() &&
vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() && vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() &&
vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods(); vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods();
if ( hasAppropriateGenotypes ) if ( hasAppropriateGenotypes )
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId));
return toRet; return toRet;
} }
@ -55,4 +54,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
public List<String> getKeyNames() { return Arrays.asList("MVLR"); } public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
private boolean checkAndSetSamples(SampleDB db){
Set<String> families = db.getFamilyIDs();
if(families.size() != 1)
return false;
Set<Sample> family = db.getFamily(families.iterator().next());
if(family.size() != 3)
return false;
Iterator<Sample> sampleIter = family.iterator();
Sample sample;
for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){
if(sample.getParents().size()==2){
motherId = sample.getMaternalID();
fatherId = sample.getPaternalID();
childId = sample.getID();
return true;
}
}
return false;
}
} }

View File

@ -12,10 +12,8 @@ import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
/** /**
@ -26,42 +24,33 @@ import java.util.*;
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation { public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation {
private Set<MendelianViolation> fullMVSet = null; private Set<Sample> trios = null;
private final static int REF = 0; private final static int REF = 0;
private final static int HET = 1; private final static int HET = 1;
private final static int HOM = 2; private final static int HOM = 2;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( fullMVSet == null ) { if ( trios == null ) {
fullMVSet = new HashSet<MendelianViolation>();
if ( walker instanceof VariantAnnotator ) { if ( walker instanceof VariantAnnotator ) {
final Map<String,Set<Sample>> families = ((VariantAnnotator) walker).getSampleDB().getFamilies(); trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
for( final Set<Sample> family : families.values() ) {
for( final Sample sample : family ) {
if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now
fullMVSet.add( new MendelianViolation(sample, 0.0) );
}
}
}
} else { } else {
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in."); throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
} }
} }
final Map<String,Object> toRet = new HashMap<String,Object>(1); final Map<String,Object> toRet = new HashMap<String,Object>(1);
final HashSet<MendelianViolation> mvsToTest = new HashSet<MendelianViolation>(); final HashSet<Sample> triosToTest = new HashSet<Sample>();
for( final MendelianViolation mv : fullMVSet ) { for( final Sample child : trios) {
final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() && final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() &&
vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() && vc.hasGenotype(child.getPaternalID()) && vc.getGenotype(child.getPaternalID()).hasLikelihoods() &&
vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods(); vc.hasGenotype(child.getMaternalID()) && vc.getGenotype(child.getMaternalID()).hasLikelihoods();
if ( hasAppropriateGenotypes ) { if ( hasAppropriateGenotypes ) {
mvsToTest.add(mv); triosToTest.add(child);
} }
} }
toRet.put("TDT", calculateTDT( vc, mvsToTest )); toRet.put("TDT", calculateTDT( vc, triosToTest ));
return toRet; return toRet;
} }
@ -72,27 +61,27 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
private double calculateTDT( final VariantContext vc, final Set<MendelianViolation> mvsToTest ) { private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM); final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM);
final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM); final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM);
final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET); final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET); final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET); final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET);
final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET); final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET);
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
return (numer * numer) / denom; return (numer * numer) / denom;
} }
private double calculateNChildren( final VariantContext vc, final Set<MendelianViolation> mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) { private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
final double likelihoodVector[] = new double[mvsToTest.size() * 2]; final double likelihoodVector[] = new double[triosToTest.size() * 2];
int iii = 0; int iii = 0;
for( final MendelianViolation mv : mvsToTest ) { for( final Sample child : triosToTest ) {
final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector(); final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
} }

View File

@ -167,9 +167,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false; protected boolean indelsOnly = false;
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
public String familyStr = null;
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio") @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
public double minGenotypeQualityP = 0.0; public double minGenotypeQualityP = 0.0;

View File

@ -1,94 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.variantcontext.Allele;
public class BiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
private Allele A, B;
private int depth;
/**
* Create a new object for sample with given alleles and genotype likelihoods
*
* @param sample sample name
* @param A allele A
* @param B allele B
* @param log10AALikelihoods AA likelihoods
* @param log10ABLikelihoods AB likelihoods
* @param log10BBLikelihoods BB likelihoods
* @param depth the read depth used in creating the likelihoods
*/
public BiallelicGenotypeLikelihoods(String sample,
Allele A,
Allele B,
double log10AALikelihoods,
double log10ABLikelihoods,
double log10BBLikelihoods,
int depth) {
this.sample = sample;
this.A = A;
this.B = B;
this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods};
this.depth = depth;
}
public String getSample() {
return sample;
}
public double getAALikelihoods() {
return GLs[0];
}
public double getABLikelihoods() {
return GLs[1];
}
public double getBBLikelihoods() {
return GLs[2];
}
public double[] getLikelihoods() {
return GLs;
}
public Allele getAlleleA() {
return A;
}
public Allele getAlleleB() {
return B;
}
public int getDepth() {
return depth;
}
}

View File

@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: Aug 4, 2009
* Time: 6:46:09 PM
* To change this template use File | Settings | File Templates.
*/
public enum DiploidGenotype { public enum DiploidGenotype {
AA ('A', 'A'), AA ('A', 'A'),
AC ('A', 'C'), AC ('A', 'C'),
@ -110,6 +103,20 @@ public enum DiploidGenotype {
return conversionMatrix[index1][index2]; return conversionMatrix[index1][index2];
} }
/**
* create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
* @param baseIndex1 base1
* @param baseIndex2 base2
* @return the diploid genotype
*/
public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) {
if ( baseIndex1 == -1 )
throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
if ( baseIndex2 == -1 )
throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
return conversionMatrix[baseIndex1][baseIndex2];
}
private static final DiploidGenotype[][] conversionMatrix = { private static final DiploidGenotype[][] conversionMatrix = {
{ DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT }, { DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT },
{ DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT }, { DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT },

View File

@ -56,7 +56,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) { private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); // TODO -- initialize with size of GLs
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) { for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
@ -364,7 +364,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
else { else {
// all possible likelihoods for a given cell from which to choose the max // all possible likelihoods for a given cell from which to choose the max
final int numPaths = set.ACsetIndexToPLIndex.size() + 1; final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
final double[] log10ConformationLikelihoods = new double[numPaths]; final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
final double[] gl = genotypeLikelihoods.get(j); final double[] gl = genotypeLikelihoods.get(j);
@ -372,6 +372,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// initialize // initialize
for ( int i = 0; i < numPaths; i++ ) for ( int i = 0; i < numPaths; i++ )
// TODO -- Arrays.fill?
// todo -- is this even necessary? Why not have as else below?
log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY; log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
// deal with the AA case first // deal with the AA case first
@ -417,6 +419,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) { private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
// todo -- arent' there a small number of fixed values that this function can adopt?
// todo -- at a minimum it'd be good to partially compute some of these in ACCounts for performance
// todo -- need to cache PLIndex -> two alleles, compute looping over each PLIndex. Note all other operations are efficient
// todo -- this can be computed once at the start of the all operations
// the closed form representation generalized for multiple alleles is as follows: // the closed form representation generalized for multiple alleles is as follows:
// AA: (2j - totalK) * (2j - totalK - 1) // AA: (2j - totalK) * (2j - totalK - 1)

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Map; import java.util.Map;
@ -79,19 +80,17 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
* @param contexts stratified alignment contexts * @param contexts stratified alignment contexts
* @param contextType stratified context type * @param contextType stratified context type
* @param priors priors to use for GLs * @param priors priors to use for GLs
* @param GLs hash of sample->GL to fill in
* @param alternateAlleleToUse the alternate allele to use, null if not set * @param alternateAlleleToUse the alternate allele to use, null if not set
* @param useBAQedPileup should we use the BAQed pileup or the raw one? * @param useBAQedPileup should we use the BAQed pileup or the raw one?
* @return genotype likelihoods per sample for AA, AB, BB * @return variant context where genotypes are no-called but with GLs
*/ */
public abstract Allele getLikelihoods(RefMetaDataTracker tracker, public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref, ReferenceContext ref,
Map<String, AlignmentContext> contexts, Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType, AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors, GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs, Allele alternateAlleleToUse,
Allele alternateAlleleToUse, boolean useBAQedPileup);
boolean useBAQedPileup);
protected int getFilteredDepth(ReadBackedPileup pileup) { protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0; int count = 0;

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// get deletion length // get deletion length
int dLen = Integer.valueOf(bestAltAllele.substring(1)); int dLen = Integer.valueOf(bestAltAllele.substring(1));
// get ref bases of accurate deletion // get ref bases of accurate deletion
int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart()); int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
//System.out.println(new String(ref.getBases())); //System.out.println(new String(ref.getBases()));
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED); private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
public Allele getLikelihoods(RefMetaDataTracker tracker, public VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref, ReferenceContext ref,
Map<String, AlignmentContext> contexts, Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType, AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors, GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs, Allele alternateAlleleToUse,
Allele alternateAlleleToUse, boolean useBAQedPileup) {
boolean useBAQedPileup) {
if ( tracker == null ) if ( tracker == null )
return null; return null;
GenomeLoc loc = ref.getLocus(); GenomeLoc loc = ref.getLocus();
Allele refAllele, altAllele; Allele refAllele, altAllele;
VariantContext vc = null; VariantContext vc = null;
@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(), haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
ref, hsize, numPrefBases); ref, hsize, numPrefBases);
// start making the VariantContext
final int endLoc = calculateEndPos(alleleList, refAllele, loc);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
// For each sample, get genotype likelihoods based on pileup // For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
// initialize the GenotypeLikelihoods
GLs.clear();
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) { for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (pileup != null ) { if (pileup != null ) {
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap()); final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), HashMap<String, Object> attributes = new HashMap<String, Object>();
alleleList, attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
genotypeLikelihoods, attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
getFilteredDepth(pileup))); genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
if (DEBUG) { if (DEBUG) {
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString()); System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
} }
return refAllele; return builder.genotypes(genotypes).make();
} }
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// for indels, stop location is one more than ref allele length
boolean hasNullAltAllele = false;
for ( Allele a : alleles ) {
if ( a.isNull() ) {
hasNullAltAllele = true;
break;
}
}
int endLoc = loc.getStart() + refAllele.length();
if( !hasNullAltAllele )
endLoc--;
return endLoc;
}
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() { public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
return indelLikelihoodMap.get(); return indelLikelihoodMap.get();

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: 6/1/11
* Time: 10:38 AM
* To change this template use File | Settings | File Templates.
*/
public class MultiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
private List<Allele> alleleList;
private int depth;
public MultiallelicGenotypeLikelihoods(String sample,
List<Allele> A,
double[] log10Likelihoods, int depth) {
/* Check for consistency between likelihood vector and number of alleles */
int numAlleles = A.size();
if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
this.sample = sample;
this.alleleList = A;
this.GLs = log10Likelihoods;
this.depth = depth;
}
public String getSample() {
return sample;
}
public double[] getLikelihoods() {
return GLs;
}
public List<Allele> getAlleles() {
return alleleList;
}
public int getDepth() {
return depth;
}
}

View File

@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.*;
import java.util.List;
import java.util.Map;
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
// the alternate allele with the largest sum of quality scores private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50;
protected Byte bestAlternateAllele = null;
private boolean ALLOW_MULTIPLE_ALLELES;
private final boolean useAlleleFromVCF; private final boolean useAlleleFromVCF;
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger); super(UAC, logger);
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
} }
public Allele getLikelihoods(RefMetaDataTracker tracker, public VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref, ReferenceContext ref,
Map<String, AlignmentContext> contexts, Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType, AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors, GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs, Allele alternateAlleleToUse,
Allele alternateAlleleToUse, boolean useBAQedPileup) {
boolean useBAQedPileup) {
if ( !(priors instanceof DiploidSNPGenotypePriors) ) if ( !(priors instanceof DiploidSNPGenotypePriors) )
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model"); throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
byte refBase = ref.getBase(); final boolean[] basesToUse = new boolean[4];
Allele refAllele = Allele.create(refBase, true); final byte refBase = ref.getBase();
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
// find the alternate allele with the largest sum of quality scores // start making the VariantContext
final GenomeLoc loc = ref.getLocus();
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(Allele.create(refBase, true));
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
// find the alternate allele(s) that we should be using
if ( alternateAlleleToUse != null ) { if ( alternateAlleleToUse != null ) {
bestAlternateAllele = alternateAlleleToUse.getBases()[0]; basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
} else if ( useAlleleFromVCF ) { } else if ( useAlleleFromVCF ) {
VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles); final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
// ignore places where we don't have a variant // ignore places where we don't have a SNP
if ( vc == null ) if ( vc == null || !vc.isSNP() )
return null; return null;
if ( !vc.isBiallelic() ) { for ( Allele allele : vc.getAlternateAlleles() )
// for multi-allelic sites go back to the reads and find the most likely alternate allele basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
} else {
bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0];
}
} else { } else {
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);
// how many alternate alleles are we using?
int alleleCounter = countSetBits(basesToUse);
// if there are no non-ref alleles...
if ( alleleCounter == 0 ) {
// if we only want variants, then we don't need to calculate genotype likelihoods
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
return builder.make();
// otherwise, choose any alternate allele (it doesn't really matter)
basesToUse[indexOfRefBase == 0 ? 1 : 0] = true;
}
} }
// if there are no non-ref bases... // create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
if ( bestAlternateAllele == null ) { final int numAltAlleles = countSetBits(basesToUse);
// if we only want variants, then we don't need to calculate genotype likelihoods final int[] alleleOrdering = new int[numAltAlleles + 1];
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) alleleOrdering[0] = indexOfRefBase;
return refAllele; int alleleOrderingIndex = 1;
int numLikelihoods = 1;
// otherwise, choose any alternate allele (it doesn't really matter) for ( int i = 0; i < 4; i++ ) {
bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C'); if ( i != indexOfRefBase && basesToUse[i] ) {
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
alleleOrdering[alleleOrderingIndex++] = i;
numLikelihoods += alleleOrderingIndex;
}
} }
builder.alleles(alleles);
Allele altAllele = Allele.create(bestAlternateAllele, false); // create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) { for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); } if ( useBAQedPileup )
pileup = createBAQedPileup( pileup );
// create the GenotypeLikelihoods object // create the GenotypeLikelihoods object
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error); final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE); final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
if ( nGoodBases == 0 ) if ( nGoodBases == 0 )
continue; continue;
double[] likelihoods = GL.getLikelihoods(); final double[] allLikelihoods = GL.getLikelihoods();
final double[] myLikelihoods = new double[numLikelihoods];
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase); int myLikelihoodsIndex = 0;
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele); for ( int i = 0; i <= numAltAlleles; i++ ) {
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele); for ( int j = i; j <= numAltAlleles; j++ ) {
ArrayList<Allele> aList = new ArrayList<Allele>(); myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
aList.add(refAllele); }
aList.add(altAllele); }
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
// normalize in log space so that max element is zero. // normalize in log space so that max element is zero.
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
} }
return refAllele; return builder.genotypes(genotypes).make();
} }
protected void initializeBestAlternateAllele(byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) { private int countSetBits(boolean[] array) {
int counter = 0;
for ( int i = 0; i < array.length; i++ ) {
if ( array[i] )
counter++;
}
return counter;
}
// fills in the allelesToUse array
protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
int[] qualCounts = new int[4]; int[] qualCounts = new int[4];
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) { for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup(); ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
for ( PileupElement p : pileup ) { for ( PileupElement p : pileup ) {
// ignore deletions // ignore deletions
if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE )) if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) )
continue; continue;
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase()); final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
@ -149,17 +189,31 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
} }
} }
// set the non-ref base with maximum quality score sum if ( ALLOW_MULTIPLE_ALLELES ) {
int maxCount = 0; for ( byte altAllele : BaseUtils.BASES ) {
bestAlternateAllele = null; if ( altAllele == ref )
for ( byte altAllele : BaseUtils.BASES ) { continue;
if ( altAllele == ref ) int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
continue; if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) {
int index = BaseUtils.simpleBaseToBaseIndex(altAllele); allelesToUse[index] = true;
if ( qualCounts[index] > maxCount ) { }
maxCount = qualCounts[index];
bestAlternateAllele = altAllele;
} }
} else {
// set the non-ref base which has the maximum quality score sum
int maxCount = 0;
int indexOfMax = 0;
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( qualCounts[index] > maxCount ) {
maxCount = qualCounts[index];
indexOfMax = index;
}
}
if ( maxCount > 0 )
allelesToUse[indexOfMax] = true;
} }
} }

View File

@ -219,14 +219,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
} }
Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>(); return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
if ( refAllele != null )
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
else
return null;
} }
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) { private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -261,40 +254,6 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vc, false); return new VariantCallContext(vc, false);
} }
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
// no-call everyone for now
List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
Set<Allele> alleles = new LinkedHashSet<Allele>();
alleles.add(refAllele);
boolean addedAltAlleles = false;
GenotypesContext genotypes = GenotypesContext.create();
for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
if ( !addedAltAlleles ) {
addedAltAlleles = true;
// ordering important to maintain consistency
for (Allele a: GL.getAlleles()) {
alleles.add(a);
}
}
HashMap<String, Object> attributes = new HashMap<String, Object>();
//GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods());
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
}
GenomeLoc loc = refContext.getLocus();
int endLoc = calculateEndPos(alleles, refAllele, loc);
return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make();
}
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
return calculateGenotypes(null, null, null, null, vc, model); return calculateGenotypes(null, null, null, null, vc, model);
} }
@ -412,8 +371,11 @@ public class UnifiedGenotyperEngine {
builder.log10PError(phredScaledConfidence/-10.0); builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) ) if ( ! passesCallThreshold(phredScaledConfidence) )
builder.filters(filter); builder.filters(filter);
if ( !limitedContext ) if ( limitedContext ) {
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
} else {
builder.referenceBaseForIndel(refContext.getBase()); builder.referenceBaseForIndel(refContext.getBase());
}
// create the genotypes // create the genotypes
GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse); GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
@ -494,42 +456,6 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
} }
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// TODO - temp fix until we can deal with extended events properly
// for indels, stop location is one more than ref allele length
boolean isSNP = true, hasNullAltAllele = false;
for (Allele a : alleles){
if (a.length() != 1) {
isSNP = false;
break;
}
}
for (Allele a : alleles){
if (a.isNull()) {
hasNullAltAllele = true;
break;
}
}
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
// standard insertion: ref allele length = 0, endLos = startLoc
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
//
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
int endLoc = loc.getStart();
if ( !isSNP) {
endLoc += refAllele.length();
if(!hasNullAltAllele)
endLoc--;
}
return endLoc;
}
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) { private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
Map<String, AlignmentContext> stratifiedContexts = null; Map<String, AlignmentContext> stratifiedContexts = null;

View File

@ -320,17 +320,17 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
if(transmissionProb != NO_TRANSMISSION_PROB) if(transmissionProb != NO_TRANSMISSION_PROB)
phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb)); phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb));
//Handle null, missing and unavailable genotypes //Handle null, missing and unavailable genotypes
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable //Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
//genotype so it is safe to return the original genotype in this case. //genotype so it is safe to return the original genotype in this case.
//In addition, if the phasing confidence is 0, then return the unphased, original genotypes. //In addition, if the phasing confidence is 0, then return the unphased, original genotypes.
if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType())) if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType()))
return genotype; return genotype;
//Add the transmission probability //Add the transmission probability
Map<String, Object> genotypeAttributes = new HashMap<String, Object>(); Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
genotypeAttributes.putAll(genotype.getAttributes()); genotypeAttributes.putAll(genotype.getAttributes());
if(transmissionProb>NO_TRANSMISSION_PROB) if(transmissionProb>NO_TRANSMISSION_PROB)
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission); genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2); ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2);

View File

@ -164,13 +164,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false) @Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
protected double MIN_PHASE_QUALITY = 10.0; protected double MIN_PHASE_QUALITY = 10.0;
/** @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50.", required=false)
* This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined.
*/
@Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false)
protected String FAMILY_STRUCTURE;
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
@ -561,8 +555,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; } public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; }
public String getFamilyStructure() { return FAMILY_STRUCTURE; }
public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; } public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; }
public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; } public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; }

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -7,9 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.io.PrintStream;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
/** /**
* Mendelian violation detection and counting * Mendelian violation detection and counting
@ -40,12 +44,25 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator") @Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
public class MendelianViolationEvaluator extends VariantEvaluator { public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "Number of mendelian variants found") @DataPoint(description = "Number of variants found with at least one family having genotypes")
long nVariants; long nVariants;
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall")
long nSkipped;
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)")
long nFamCalled;
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.")
long nVarFamCalled;
@DataPoint(description="Number of variants x families discarded as low quality")
long nLowQual;
@DataPoint(description="Number of variants x families discarded as no call")
long nNoCall;
@DataPoint(description="Number of loci with mendelian violations")
long nLociViolations;
@DataPoint(description = "Number of mendelian violations found") @DataPoint(description = "Number of mendelian violations found")
long nViolations; long nViolations;
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
long KidHomRef_ParentHomVar; long KidHomRef_ParentHomVar;
@DataPoint(description = "number of child het calls where the parent was hom ref") @DataPoint(description = "number of child het calls where the parent was hom ref")
long KidHet_ParentsHomRef; long KidHet_ParentsHomRef;
@ -53,11 +70,65 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
long KidHet_ParentsHomVar; long KidHet_ParentsHomVar;
@DataPoint(description = "number of child hom variant calls where the parent was hom ref") @DataPoint(description = "number of child hom variant calls where the parent was hom ref")
long KidHomVar_ParentHomRef; long KidHomVar_ParentHomRef;
*/
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR")
long mvRefRef_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET")
long mvRefRef_Het;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR")
long mvRefHet_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR")
long mvRefVar_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF")
long mvRefVar_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF")
long mvVarHet_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF")
long mvVarVar_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET")
long mvVarVar_Het;
/*@DataPoint(description ="Number of inherited var alleles from het parents")
long nInheritedVar;
@DataPoint(description ="Number of inherited ref alleles from het parents")
long nInheritedRef;*/
@DataPoint(description="Number of HomRef/HomRef/HomRef trios")
long HomRefHomRef_HomRef;
@DataPoint(description="Number of Het/Het/Het trios")
long HetHet_Het;
@DataPoint(description="Number of Het/Het/HomRef trios")
long HetHet_HomRef;
@DataPoint(description="Number of Het/Het/HomVar trios")
long HetHet_HomVar;
@DataPoint(description="Number of HomVar/HomVar/HomVar trios")
long HomVarHomVar_HomVar;
@DataPoint(description="Number of HomRef/HomVar/Het trios")
long HomRefHomVAR_Het;
@DataPoint(description="Number of ref alleles inherited from het/het parents")
long HetHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from het/het parents")
long HetHet_inheritedVar;
@DataPoint(description="Number of ref alleles inherited from homRef/het parents")
long HomRefHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from homRef/het parents")
long HomRefHet_inheritedVar;
@DataPoint(description="Number of ref alleles inherited from homVar/het parents")
long HomVarHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from homVar/het parents")
long HomVarHet_inheritedVar;
MendelianViolation mv; MendelianViolation mv;
PrintStream mvFile;
Map<String,Set<Sample>> families;
public void initialize(VariantEvalWalker walker) { public void initialize(VariantEvalWalker walker) {
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold()); //Changed by Laurent Francioli - 2011-06-07
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
families = walker.getSampleDB().getFamilies();
} }
public boolean enabled() { public boolean enabled() {
@ -75,110 +146,48 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
if (mv.setAlleles(vc)) {
if(mv.countViolations(families,vc)>0){
nLociViolations++;
nViolations += mv.getViolationsCount();
mvRefRef_Var += mv.getParentsRefRefChildVar();
mvRefRef_Het += mv.getParentsRefRefChildHet();
mvRefHet_Var += mv.getParentsRefHetChildVar();
mvRefVar_Var += mv.getParentsRefVarChildVar();
mvRefVar_Ref += mv.getParentsRefVarChildRef();
mvVarHet_Ref += mv.getParentsVarHetChildRef();
mvVarVar_Ref += mv.getParentsVarVarChildRef();
mvVarVar_Het += mv.getParentsVarVarChildHet();
}
HomRefHomRef_HomRef += mv.getRefRefRef();
HetHet_Het += mv.getHetHetHet();
HetHet_HomRef += mv.getHetHetHomRef();
HetHet_HomVar += mv.getHetHetHomVar();
HomVarHomVar_HomVar += mv.getVarVarVar();
HomRefHomVAR_Het += mv.getRefVarHet();
HetHet_inheritedRef += mv.getParentsHetHetInheritedRef();
HetHet_inheritedVar += mv.getParentsHetHetInheritedVar();
HomRefHet_inheritedRef += mv.getParentsRefHetInheritedRef();
HomRefHet_inheritedVar += mv.getParentsRefHetInheritedVar();
HomVarHet_inheritedRef += mv.getParentsVarHetInheritedRef();
HomVarHet_inheritedVar += mv.getParentsVarHetInheritedVar();
if(mv.getFamilyCalledCount()>0){
nVariants++; nVariants++;
nFamCalled += mv.getFamilyCalledCount();
Genotype momG = vc.getGenotype(mv.getSampleMom()); nLowQual += mv.getFamilyLowQualsCount();
Genotype dadG = vc.getGenotype(mv.getSampleDad()); nNoCall += mv.getFamilyNoCallCount();
Genotype childG = vc.getGenotype(mv.getSampleChild()); nVarFamCalled += mv.getVarFamilyCalledCount();
if (mv.isViolation()) {
nViolations++;
String label;
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
label = "KidHomRef_ParentHomVar";
KidHomRef_ParentHomVar++;
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
label = "KidHet_ParentsHomRef";
KidHet_ParentsHomRef++;
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
label = "KidHet_ParentsHomVar";
KidHet_ParentsHomVar++;
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
label = "KidHomVar_ParentHomRef";
KidHomVar_ParentHomRef++;
} else {
throw new ReviewedStingException("BUG: unexpected child genotype class " + childG);
}
return "MendelViolation=" + label;
}
} }
} else{
nSkipped++;
return null; // we don't capture any intersting sites
}
/*
private double getQThreshold() {
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
//return 0.0;
}
TrioStructure trio;
double mendelianViolationQualThreshold;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public static class TrioStructure {
public String mom, dad, child;
}
public static TrioStructure parseTrioDescription(String family) {
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
TrioStructure trio = new TrioStructure();
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
trio.mom = m.group(1);
trio.dad = m.group(2);
trio.child = m.group(3);
return trio;
} else {
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
}
public void initialize(VariantEvalWalker walker) {
trio = parseTrioDescription(walker.getFamilyStructure());
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
}
private boolean includeGenotype(Genotype g) {
return g.getLog10PError() > getQThreshold() && g.isCalled();
}
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
}
public static boolean isViolation(VariantContext vc, TrioStructure trio ) {
return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) );
}
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
//VariantContext momVC = vc.subContextFromGenotypes(momG);
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
int i = 0;
Genotype childG = new Genotype("kidG", childA);
for (Allele momAllele : momA) {
for (Allele dadAllele : dadA) {
if (momAllele.isCalled() && dadAllele.isCalled()) {
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
if (childG.sameGenotype(possibleChild)) {
return false;
}
}
} }
return null;
} }
return true; return null; // we don't capture any interesting sites
} }
*/
} }

View File

@ -26,9 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -41,7 +42,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
@ -180,7 +180,7 @@ import java.util.*;
* </pre> * </pre>
* *
*/ */
public class SelectVariants extends RodWalker<Integer, Integer> { public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/** /**
@ -282,6 +282,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false) @Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
private double fractionRandom = 0; private double fractionRandom = 0;
@Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
private double fractionGenotypes = 0;
/** /**
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria. * This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
* When specified one or more times, a particular type of variant is selected. * When specified one or more times, a particular type of variant is selected.
@ -325,7 +328,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private boolean DISCORDANCE_ONLY = false; private boolean DISCORDANCE_ONLY = false;
private boolean CONCORDANCE_ONLY = false; private boolean CONCORDANCE_ONLY = false;
private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>(); private MendelianViolation mv;
/* variables used by the SELECT RANDOM modules */ /* variables used by the SELECT RANDOM modules */
@ -344,6 +347,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private PrintStream outMVFileStream = null; private PrintStream outMVFileStream = null;
//Random number generator for the genotypes to remove
private Random randomGenotypes = new Random();
/** /**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
@ -380,8 +385,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
for ( String sample : samples ) for ( String sample : samples )
logger.info("Including sample '" + sample + "'"); logger.info("Including sample '" + sample + "'");
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include // if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
if (TYPES_TO_INCLUDE.isEmpty()) { if (TYPES_TO_INCLUDE.isEmpty()) {
@ -421,29 +424,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName()); if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
if (MENDELIAN_VIOLATIONS) { if (MENDELIAN_VIOLATIONS) {
if ( FAMILY_STRUCTURE_FILE != null) { mv = new MendelianViolation(MENDELIAN_VIOLATION_QUAL_THRESHOLD,false,true);
try {
for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) {
MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom()))
mvSet.add(mv);
}
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
}
if (outMVFile != null)
try {
outMVFileStream = new PrintStream(outMVFile);
}
catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
}
else
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
}
else if (!FAMILY_STRUCTURE.isEmpty()) {
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
MENDELIAN_VIOLATIONS = true;
} }
SELECT_RANDOM_NUMBER = numRandom > 0; SELECT_RANDOM_NUMBER = numRandom > 0;
@ -479,26 +460,26 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
} }
for (VariantContext vc : vcs) { for (VariantContext vc : vcs) {
if (MENDELIAN_VIOLATIONS) { if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
boolean foundMV = false; break;
for (MendelianViolation mv : mvSet) {
if (mv.isViolation(vc)) { if (outMVFile != null){
foundMV = true; for( String familyId : mv.getViolationFamilies()){
//System.out.println(vc.toString()); for(Sample sample : this.getSampleDB().getFamily(familyId)){
if (outMVFile != null) if(sample.getParents().size() > 0){
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() ); vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
}
} }
} }
if (!foundMV)
break;
} }
if (DISCORDANCE_ONLY) { if (DISCORDANCE_ONLY) {
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation()); Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
if (!isDiscordant(vc, compVCs)) if (!isDiscordant(vc, compVCs))
@ -629,6 +610,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Override @Override
public Integer reduce(Integer value, Integer sum) { return value + sum; } public Integer reduce(Integer value, Integer sum) { return value + sum; }
@Override
public Integer treeReduce(Integer lhs, Integer rhs) {
return lhs + rhs;
}
public void onTraversalDone(Integer result) { public void onTraversalDone(Integer result) {
logger.info(result + " records processed."); logger.info(result + " records processed.");
@ -657,9 +643,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles()); final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
VariantContextBuilder builder = new VariantContextBuilder(sub); VariantContextBuilder builder = new VariantContextBuilder(sub);
GenotypesContext newGC = sub.getGenotypes();
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
if ( vc.getAlleles().size() != sub.getAlleles().size() ) if ( vc.getAlleles().size() != sub.getAlleles().size() )
builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes())); newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
//Remove a fraction of the genotypes if needed
if(fractionGenotypes>0){
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
alleles.add(Allele.create((byte)'.'));
alleles.add(Allele.create((byte)'.'));
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
}
else{
genotypes.add(genotype);
}
}
newGC = GenotypesContext.create(genotypes);
}
builder.genotypes(newGC);
int depth = 0; int depth = 0;
for (String sample : sub.getSampleNames()) { for (String sample : sub.getSampleNames()) {

View File

@ -1,147 +1,394 @@
package org.broadinstitute.sting.utils; package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection; import java.util.*;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** /**
* User: carneiro * User: carneiro / lfran
* Date: 3/9/11 * Date: 3/9/11
* Time: 12:38 PM * Time: 12:38 PM
*
* Class for the identification and tracking of mendelian violation. It can be used in 2 distinct ways:
* - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while
* walking over the variants
* - Or using the static methods to directly get information about mendelian violation in a family at a given locus
*
*/ */
public class MendelianViolation { public class MendelianViolation {
String sampleMom; //List of families with violations
String sampleDad; private List<String> violationFamilies;
String sampleChild;
List allelesMom; //Call information
List allelesDad; private int nocall = 0;
List allelesChild; private int familyCalled = 0;
private int varFamilyCalled = 0;
private int lowQual = 0;
double minGenotypeQuality; private boolean allCalledOnly = true;
//Stores occurrences of inheritance
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
private int violations_total=0;
private double minGenotypeQuality;
private boolean abortOnSampleNotFound;
//Number of families with genotype information for all members
public int getFamilyCalledCount(){
return familyCalled;
}
//Number of families with genotype information for all members
public int getVarFamilyCalledCount(){
return varFamilyCalled;
}
//Number of families missing genotypes for one or more of their members
public int getFamilyNoCallCount(){
return nocall;
}
//Number of families with genotypes below the set quality threshold
public int getFamilyLowQualsCount(){
return lowQual;
}
public int getViolationsCount(){
return violations_total;
}
//Count of alt alleles inherited from het parents (no violation)
public int getParentHetInheritedVar(){
return getParentsHetHetInheritedVar() + getParentsRefHetInheritedVar() + getParentsVarHetInheritedVar();
}
//Count of ref alleles inherited from het parents (no violation)
public int getParentHetInheritedRef(){
return getParentsHetHetInheritedRef() + getParentsRefHetInheritedRef() + getParentsVarHetInheritedRef();
}
//Count of HomRef/HomRef/HomRef trios
public int getRefRefRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
}
//Count of HomVar/HomVar/HomVar trios
public int getVarVarVar(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
}
//Count of HomRef/HomVar/Het trios
public int getRefVarHet(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
}
//Count of Het/Het/Het trios
public int getHetHetHet(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
}
//Count of Het/Het/HomRef trios
public int getHetHetHomRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
}
//Count of Het/Het/HomVar trios
public int getHetHetHomVar(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
}
//Count of ref alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
//return parentsHetHet_childRef;
}
//Count of var alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedVar(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
//return parentsHetHet_childVar;
}
//Count of ref alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
//return parentsHomRefHet_childRef;
}
//Count of var alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
//return parentsHomRefHet_childVar;
}
//Count of ref alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedRef(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
//return parentsHomVarHet_childRef;
}
//Count of var alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedVar(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
//return parentsHomVarHet_childVar;
}
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
public int getParentsRefRefChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_REF -> HET
public int getParentsRefRefChildHet(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
}
//Count of violations of the type HOM_REF/HET -> HOM_VAR
public int getParentsRefHetChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
public int getParentsRefVarChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
public int getParentsRefVarChildRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HET -> HOM_REF
public int getParentsVarHetChildRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
public int getParentsVarVarChildRef(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
}
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
public int getParentsVarVarChildHet(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
}
//Count of violations of the type HOM_VAR/? -> HOM_REF
public int getParentVarChildRef(){
return getParentsRefVarChildRef() + getParentsVarHetChildRef() +getParentsVarVarChildRef();
}
//Count of violations of the type HOM_REF/? -> HOM_VAR
public int getParentRefChildVar(){
return getParentsRefVarChildVar() + getParentsRefHetChildVar() +getParentsRefRefChildVar();
}
//Returns a String containing all trios where a Mendelian violation was observed.
//The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..."
public String getViolationFamiliesString(){
if(violationFamilies.isEmpty())
return "";
Iterator<String> it = violationFamilies.iterator();
String violationFams = it.next();
while(it.hasNext()){
violationFams += ","+it.next();
}
return violationFams;
}
public List<String> getViolationFamilies(){
return violationFamilies;
}
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public String getSampleMom() {
return sampleMom;
}
public String getSampleDad() {
return sampleDad;
}
public String getSampleChild() {
return sampleChild;
}
public double getMinGenotypeQuality() { public double getMinGenotypeQuality() {
return minGenotypeQuality; return minGenotypeQuality;
} }
/** /**
* * Constructor
* @param sampleMomP - sample name of mom
* @param sampleDadP - sample name of dad
* @param sampleChildP - sample name of child
*/
public MendelianViolation (String sampleMomP, String sampleDadP, String sampleChildP) {
sampleMom = sampleMomP;
sampleDad = sampleDadP;
sampleChild = sampleChildP;
}
/**
*
* @param family - the sample names string "mom+dad=child"
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
*/
public MendelianViolation(String family, double minGenotypeQualityP) {
minGenotypeQuality = minGenotypeQualityP;
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
sampleMom = m.group(1);
sampleDad = m.group(2);
sampleChild = m.group(3);
}
else
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
/**
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation
*/
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
sampleMom = sample.getMother().getID();
sampleDad = sample.getFather().getID();
sampleChild = sample.getID();
minGenotypeQuality = minGenotypeQualityP;
}
/**
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
*/
public boolean setAlleles (VariantContext vc)
{
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
Genotype gChild = vc.getGenotypes(sampleChild).get(sampleChild);
if (gMom == null || gDad == null || gChild == null)
throw new IllegalArgumentException(String.format("Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s", vc.getChr(), vc.getStart(), sampleMom, sampleDad, sampleChild));
if (gMom.isNoCall() || gDad.isNoCall() || gChild.isNoCall() ||
gMom.getPhredScaledQual() < minGenotypeQuality ||
gDad.getPhredScaledQual() < minGenotypeQuality ||
gChild.getPhredScaledQual() < minGenotypeQuality ) {
return false;
}
allelesMom = gMom.getAlleles();
allelesDad = gDad.getAlleles();
allelesChild = gChild.getAlleles();
return !allelesMom.isEmpty() && !allelesDad.isEmpty() && !allelesChild.isEmpty();
}
/**
* *
*/
public MendelianViolation(double minGenotypeQualityP) {
this(minGenotypeQualityP,true);
}
/**
* Constructor
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
*/
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound) {
minGenotypeQuality = minGenotypeQualityP;
this.abortOnSampleNotFound = abortOnSampleNotFound;
violationFamilies = new ArrayList<String>();
createInheritanceMap();
}
/**
* Constructor
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
* @param completeTriosOnly - whether only complete trios are considered or parent/child pairs are too.
*/
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound, boolean completeTriosOnly) {
minGenotypeQuality = minGenotypeQualityP;
this.abortOnSampleNotFound = abortOnSampleNotFound;
violationFamilies = new ArrayList<String>();
createInheritanceMap();
allCalledOnly = completeTriosOnly;
}
/**
* @param families the families to be checked for Mendelian violations
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child. * @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
*
*/
public boolean isViolation(VariantContext vc)
{
return setAlleles(vc) && isViolation();
}
/**
* @return whether or not there is a mendelian violation at the site. * @return whether or not there is a mendelian violation at the site.
*/ */
public boolean isViolation() { public int countViolations(Map<String, Set<Sample>> families, VariantContext vc){
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0))) //Reset counts
return false; nocall = 0;
return true; lowQual = 0;
familyCalled = 0;
varFamilyCalled = 0;
violations_total=0;
violationFamilies.clear();
clearInheritanceMap();
for(Set<Sample> family : families.values()){
Iterator<Sample> sampleIterator = family.iterator();
Sample sample;
while(sampleIterator.hasNext()){
sample = sampleIterator.next();
if(sample.getParents().size() > 0)
updateViolations(sample.getFamilyID(),sample.getMaternalID(), sample.getPaternalID(), sample.getID() ,vc);
}
}
return violations_total;
}
public boolean isViolation(Sample mother, Sample father, Sample child, VariantContext vc){
//Reset counts
nocall = 0;
lowQual = 0;
familyCalled = 0;
varFamilyCalled = 0;
violations_total=0;
violationFamilies.clear();
clearInheritanceMap();
updateViolations(mother.getFamilyID(),mother.getID(),father.getID(),child.getID(),vc);
return violations_total>0;
}
private void updateViolations(String familyId, String motherId, String fatherId, String childId, VariantContext vc){
int count;
Genotype gMom = vc.getGenotype(motherId);
Genotype gDad = vc.getGenotype(fatherId);
Genotype gChild = vc.getGenotype(childId);
if (gMom == null || gDad == null || gChild == null){
if(abortOnSampleNotFound)
throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getChr(), vc.getStart(), familyId, motherId, fatherId, childId));
else
return;
}
//Count No calls
if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){
nocall++;
}
else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){
nocall++;
}
//Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
else if (minGenotypeQuality>0 && (gMom.getPhredScaledQual() < minGenotypeQuality ||
gDad.getPhredScaledQual() < minGenotypeQuality ||
gChild.getPhredScaledQual() < minGenotypeQuality )) {
lowQual++;
}
else{
//Count all families per loci called
familyCalled++;
//If the family is all homref, not too interesting
if(!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef()))
{
varFamilyCalled++;
if(isViolation(gMom, gDad, gChild)){
violationFamilies.add(familyId);
violations_total++;
}
}
count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType());
inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1);
}
}
private boolean isViolation(Genotype gMom, Genotype gDad, Genotype gChild) {
//1 parent is no "call
if(!gMom.isCalled()){
return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef());
}
else if(!gDad.isCalled()){
return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef());
}
//Both parents have genotype information
return !(gMom.getAlleles().contains(gChild.getAlleles().get(0)) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) ||
gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(gChild.getAlleles().get(0)));
}
private void createInheritanceMap(){
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
for(Genotype.Type mType : Genotype.Type.values()){
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
for(Genotype.Type dType : Genotype.Type.values()){
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
for(Genotype.Type cType : Genotype.Type.values()){
inheritance.get(mType).get(dType).put(cType, 0);
}
}
}
}
private void clearInheritanceMap(){
for(Genotype.Type mType : Genotype.Type.values()){
for(Genotype.Type dType : Genotype.Type.values()){
for(Genotype.Type cType : Genotype.Type.values()){
inheritance.get(mType).get(dType).put(cType, 0);
}
}
}
} }
/** /**
* @return the likelihood ratio for a mendelian violation * @return the likelihood ratio for a mendelian violation
*/ */
public double violationLikelihoodRatio(VariantContext vc) { public double violationLikelihoodRatio(VariantContext vc, String motherId, String fatherId, String childId) {
double[] logLikAssignments = new double[27]; double[] logLikAssignments = new double[27];
// the matrix to set up is // the matrix to set up is
// MOM DAD CHILD // MOM DAD CHILD
@ -152,9 +399,9 @@ public class MendelianViolation {
// AA AB | AB // AA AB | AB
// |- BB // |- BB
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); double[] momGL = vc.getGenotype(motherId).getLikelihoods().getAsVector();
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); double[] dadGL = vc.getGenotype(fatherId).getLikelihoods().getAsVector();
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); double[] childGL = vc.getGenotype(childId).getLikelihoods().getAsVector();
int offset = 0; int offset = 0;
for ( int oMom = 0; oMom < 3; oMom++ ) { for ( int oMom = 0; oMom < 3; oMom++ ) {
for ( int oDad = 0; oDad < 3; oDad++ ) { for ( int oDad = 0; oDad < 3; oDad++ ) {

View File

@ -27,11 +27,42 @@ public class SampleDBUnitTest extends BaseTest {
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED))); new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
private static final Set<Sample> testPEDFamilyF2 = new HashSet<Sample>(Arrays.asList(
new Sample("s2", "fam2", "d2", "m2", Gender.FEMALE, Affection.AFFECTED),
new Sample("d2", "fam2", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("m2", "fam2", null, null, Gender.FEMALE, Affection.UNKNOWN)
));
private static final Set<Sample> testPEDFamilyF3 = new HashSet<Sample>(Arrays.asList(
new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED),
new Sample("d1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN),
new Sample("m1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN)
));
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList( private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN), new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN), new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN))); new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
private static final HashMap<String, Set<Sample>> testGetFamilies = new HashMap<String,Set<Sample>>();
static {
testGetFamilies.put("fam1", testPEDSamples);
testGetFamilies.put("fam2", testPEDFamilyF2);
testGetFamilies.put("fam3", testPEDFamilyF3);
}
private static final Set<Sample> testKidsWithParentsFamilies2 = new HashSet<Sample>(Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("kid3", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED),
new Sample("kid2", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED)));
private static final HashSet<String> testGetPartialFamiliesIds = new HashSet<String>(Arrays.asList("kid","s1"));
private static final HashMap<String, Set<Sample>> testGetPartialFamilies = new HashMap<String,Set<Sample>>();
static {
testGetPartialFamilies.put("fam1", new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
testGetPartialFamilies.put("fam3", new HashSet<Sample>(Arrays.asList(new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED))));
}
private static final String testPEDString = private static final String testPEDString =
String.format("%s%n%s%n%s", String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2", "fam1 kid dad mom 1 2",
@ -46,6 +77,18 @@ public class SampleDBUnitTest extends BaseTest {
"fam3 s1 d1 m1 2 2", "fam3 s1 d1 m1 2 2",
"fam2 s2 d2 m2 2 2"); "fam2 s2 d2 m2 2 2");
private static final String testPEDMultipleFamilies2 =
String.format("%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2",
"fam4 kid4 dad4 0 1 2",
"fam4 dad4 0 0 1 1",
"fam5 kid2 dad2 mom2 1 2",
"fam5 kid3 dad2 mom2 1 2",
"fam5 dad2 0 0 1 1",
"fam5 mom2 0 0 2 2");
private static final String testPEDStringInconsistentGender = private static final String testPEDStringInconsistentGender =
"fam1 kid 0 0 2 2"; "fam1 kid 0 0 2 2";
@ -138,6 +181,25 @@ public class SampleDBUnitTest extends BaseTest {
Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet); Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
} }
@Test()
public void getFamilies(){
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(db.getFamilies(),testGetFamilies);
Assert.assertEquals(db.getFamilies(null),testGetFamilies);
Assert.assertEquals(db.getFamilies(testGetPartialFamiliesIds),testGetPartialFamilies);
}
@Test()
public void testGetChildrenWithParents()
{
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies2));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(db.getChildrenWithParents(), testKidsWithParentsFamilies2);
Assert.assertEquals(db.getChildrenWithParents(false), testKidsWithParentsFamilies2);
Assert.assertEquals(db.getChildrenWithParents(true), new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
}
@Test() @Test()
public void loadFamilyIDs() { public void loadFamilyIDs() {
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies)); builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));

View File

@ -291,6 +291,17 @@ public class VariantEvalIntegrationTest extends WalkerTest {
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
} }
@Test
public void testVEMendelianViolationEvaluator() {
String vcfFile = "/MendelianViolationEval.vcf";
String pedFile = "/MendelianViolationEval.ped";
WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST",
1,
Arrays.asList("66e72c887124f40933d32254b2dd44a3"));
executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec);
}
@Test @Test
public void testCompVsEvalAC() { public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";

View File

@ -115,4 +115,26 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testUsingDbsnpName--" + testFile, spec); executeTest("testUsingDbsnpName--" + testFile, spec);
} }
@Test
public void testParallelization() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testParallelization (2 threads)--" + testfile, spec);
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testParallelization (4 threads)--" + testfile, spec);
}
} }

View File

@ -83,6 +83,10 @@ class DataProcessingPipeline extends QScript {
@Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) @Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false)
var defaultPlatform: String = "" var defaultPlatform: String = ""
@Hidden
@Input(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false)
var testMode: Boolean = false
/**************************************************************************** /****************************************************************************
* Global Variables * Global Variables
@ -335,6 +339,7 @@ class DataProcessingPipeline extends QScript {
this.known ++= qscript.indels this.known ++= qscript.indels
this.consensusDeterminationModel = cleanModelEnum this.consensusDeterminationModel = cleanModelEnum
this.compress = 0 this.compress = 0
this.noPGTag = qscript.testMode;
this.scatterCount = nContigs this.scatterCount = nContigs
this.analysisName = queueLogDir + outBam + ".clean" this.analysisName = queueLogDir + outBam + ".clean"
this.jobName = queueLogDir + outBam + ".clean" this.jobName = queueLogDir + outBam + ".clean"
@ -360,6 +365,7 @@ class DataProcessingPipeline extends QScript {
this.out = outBam this.out = outBam
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString) if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
else if (qscript.intervals != null) this.intervals :+= qscript.intervals else if (qscript.intervals != null) this.intervals :+= qscript.intervals
this.no_pg_tag = qscript.testMode
this.scatterCount = nContigs this.scatterCount = nContigs
this.isIntermediate = false this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration" this.analysisName = queueLogDir + outBam + ".recalibration"

View File

@ -47,6 +47,10 @@ class PacbioProcessingPipeline extends QScript {
@Input(shortName="bwastring", required=false) @Input(shortName="bwastring", required=false)
var bwastring: String = "" var bwastring: String = ""
@Hidden
@Input(shortName = "test", fullName = "test_mode", required = false)
var testMode: Boolean = false
val queueLogDir: String = ".qlog/" val queueLogDir: String = ".qlog/"
def script = { def script = {
@ -170,6 +174,7 @@ class PacbioProcessingPipeline extends QScript {
this.input_file :+= inBam this.input_file :+= inBam
this.recal_file = inRecalFile this.recal_file = inRecalFile
this.out = outBam this.out = outBam
this.no_pg_tag = testMode
this.isIntermediate = false this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration" this.analysisName = queueLogDir + outBam + ".recalibration"
this.jobName = queueLogDir + outBam + ".recalibration" this.jobName = queueLogDir + outBam + ".recalibration"
@ -177,7 +182,6 @@ class PacbioProcessingPipeline extends QScript {
} }
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
this.resources = R
this.recal_file = inRecalFile this.recal_file = inRecalFile
this.output_dir = outPath this.output_dir = outPath
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"

View File

@ -0,0 +1,69 @@
package org.broadinstitute.sting.queue.pipeline
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.BaseTest
class DataProcessingPipelineTest {
@Test
def testSimpleBAM {
val projectName = "test1"
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
val spec = new PipelineTestSpec
spec.name = "DataProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
" -nv ",
" -test ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "1f85e76de760167a77ed1d9ab4da2936"
PipelineTest.executeTest(spec)
}
@Test
def testBWAPEBAM {
val projectName = "test2"
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
val spec = new PipelineTestSpec
spec.name = "DataProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
" -nv ",
" -test ",
" -bwa /home/unix/carneiro/bin/bwa",
" -bwape ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "57416a0abdf9524bc92834d466529708"
PipelineTest.executeTest(spec)
}
}

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.queue.pipeline
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.BaseTest
class PacbioProcessingPipelineTest {
@Test
def testBAM {
val testOut = "exampleBAM.recal.bam"
val spec = new PipelineTestSpec
spec.name = "pacbioProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -blasr ",
" -test ",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString
spec.fileMD5s += testOut -> "f0adce660b55cb91d5f987f9a145471e"
PipelineTest.executeTest(spec)
}
}

Binary file not shown.

Binary file not shown.

282
public/testdata/exampleDBSNP.vcf vendored 100644
View File

@ -0,0 +1,282 @@
##fileformat=VCFv4.1
##FILTER=<ID=NC,Description="Inconsistent Genotype Submission For At Least One Sample">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=ASP,Number=0,Type=Flag,Description="Is Assembly specific. This is set if the variant only maps to one assembly">
##INFO=<ID=ASS,Number=0,Type=Flag,Description="In acceptor splice site FxnCode = 73">
##INFO=<ID=CDA,Number=0,Type=Flag,Description="Variation is interrogated in a clinical diagnostic assay">
##INFO=<ID=CFL,Number=0,Type=Flag,Description="Has Assembly conflict. This is for weight 1 and 2 variant that maps to different chromosomes on different assemblies.">
##INFO=<ID=CLN,Number=0,Type=Flag,Description="Variant is Clinical(LSDB,OMIM,TPA,Diagnostic)">
##INFO=<ID=DSS,Number=0,Type=Flag,Description="In donor splice-site FxnCode = 75">
##INFO=<ID=G5,Number=0,Type=Flag,Description=">5% minor allele frequency in 1+ populations">
##INFO=<ID=G5A,Number=0,Type=Flag,Description=">5% minor allele frequency in each and all populations">
##INFO=<ID=GCF,Number=0,Type=Flag,Description="Has Genotype Conflict Same (rs, ind), different genotype. N/N is not included.">
##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar (|)">
##INFO=<ID=GMAF,Number=1,Type=Float,Description="Global Minor Allele Frequency [0, 0.5]; global population is 1000GenomesProject phase 1 genotype data from 629 individuals, released in the 08-04-2010 dataset">
##INFO=<ID=GNO,Number=0,Type=Flag,Description="Genotypes available. The variant has individual genotype (in SubInd table).">
##INFO=<ID=HD,Number=0,Type=Flag,Description="Marker is on high density genotyping kit (50K density or greater). The variant may have phenotype associations present in dbGaP.">
##INFO=<ID=INT,Number=0,Type=Flag,Description="In Intron FxnCode = 6">
##INFO=<ID=KGPROD,Number=0,Type=Flag,Description="1000 Genome production phase">
##INFO=<ID=KGPilot1,Number=0,Type=Flag,Description="1000 Genome discovery(pilot1) 2009">
##INFO=<ID=KGPilot123,Number=0,Type=Flag,Description="1000 Genome discovery all pilots 2010(1,2,3)">
##INFO=<ID=KGVAL,Number=0,Type=Flag,Description="1000 Genome validated by second method">
##INFO=<ID=LSD,Number=0,Type=Flag,Description="Submitted from a locus-specific database">
##INFO=<ID=MTP,Number=0,Type=Flag,Description="Microattribution/third-party annotation(TPA:GWAS,PAGE)">
##INFO=<ID=MUT,Number=0,Type=Flag,Description="Is mutation (journal citation, explicit fact): a low frequency variation that is cited in journal and other reputable sources">
##INFO=<ID=NOC,Number=0,Type=Flag,Description="Contig allele not present in variant allele list. The reference sequence allele at the mapped position is not present in the variant allele list, adjusted for orientation.">
##INFO=<ID=NOV,Number=0,Type=Flag,Description="Rs cluster has non-overlapping allele sets. True when rs set has more than 2 alleles from different submissions and these sets share no alleles in common.">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=NSF,Number=0,Type=Flag,Description="Has non-synonymous frameshift A coding region variation where one allele in the set changes all downstream amino acids. FxnClass = 44">
##INFO=<ID=NSM,Number=0,Type=Flag,Description="Has non-synonymous missense A coding region variation where one allele in the set changes protein peptide. FxnClass = 42">
##INFO=<ID=NSN,Number=0,Type=Flag,Description="Has non-synonymous nonsense A coding region variation where one allele in the set changes to STOP codon (TER). FxnClass = 41">
##INFO=<ID=OM,Number=0,Type=Flag,Description="Has OMIM/OMIA">
##INFO=<ID=OTH,Number=0,Type=Flag,Description="Has other variant with exactly the same set of mapped positions on NCBI refernce assembly.">
##INFO=<ID=PH1,Number=0,Type=Flag,Description="Phase 1 genotyped: filtered, non-redundant">
##INFO=<ID=PH2,Number=0,Type=Flag,Description="Phase 2 genotyped: filtered, non-redundant">
##INFO=<ID=PH3,Number=0,Type=Flag,Description="Phase 3 genotyped: filtered, non-redundant">
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
##INFO=<ID=PMC,Number=0,Type=Flag,Description="Links exist to PubMed Central article">
##INFO=<ID=R3,Number=0,Type=Flag,Description="In 3' gene region FxnCode = 13">
##INFO=<ID=R5,Number=0,Type=Flag,Description="In 5' gene region FxnCode = 15">
##INFO=<ID=REF,Number=0,Type=Flag,Description="Has reference A coding region variation where one allele in the set is identical to the reference sequence. FxnCode = 8">
##INFO=<ID=RSPOS,Number=1,Type=Integer,Description="Chr position reported in dbSNP">
##INFO=<ID=RV,Number=0,Type=Flag,Description="RS orientation is reversed">
##INFO=<ID=S3D,Number=0,Type=Flag,Description="Has 3D structure - SNP3D table">
##INFO=<ID=SAO,Number=1,Type=Integer,Description="Variant Allele Origin: 0 - unspecified, 1 - Germline, 2 - Somatic, 3 - Both">
##INFO=<ID=SCS,Number=1,Type=Integer,Description="Variant Clinical Significance, 0 - unknown, 1 - untested, 2 - non-pathogenic, 3 - probable-non-pathogenic, 4 - probable-pathogenic, 5 - pathogenic, 6 - drug-response, 7 - histocompatibility, 255 - other">
##INFO=<ID=SLO,Number=0,Type=Flag,Description="Has SubmitterLinkOut - From SNP->SubSNP->Batch.link_out">
##INFO=<ID=SSR,Number=1,Type=Integer,Description="Variant Suspect Reason Code, 0 - unspecified, 1 - Paralog, 2 - byEST, 3 - Para_EST, 4 - oldAlign, 5 - other">
##INFO=<ID=SYN,Number=0,Type=Flag,Description="Has synonymous A coding region variation where one allele in the set does not change the encoded amino acid. FxnCode = 3">
##INFO=<ID=TPA,Number=0,Type=Flag,Description="Provisional Third Party Annotation(TPA) (currently rs from PHARMGKB who will give phenotype data)">
##INFO=<ID=U3,Number=0,Type=Flag,Description="In 3' UTR Location is in an untranslated region (UTR). FxnCode = 53">
##INFO=<ID=U5,Number=0,Type=Flag,Description="In 5' UTR Location is in an untranslated region (UTR). FxnCode = 55">
##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
##INFO=<ID=VLD,Number=0,Type=Flag,Description="Is Validated. This bit is set if the variant has 2+ minor allele count based on frequency or genotype data.">
##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property">
##INFO=<ID=WGT,Number=1,Type=Integer,Description="Weight, 00 - unmapped, 1 - weight 1, 2 - weight 2, 3 - weight 3 or more">
##INFO=<ID=WTD,Number=0,Type=Flag,Description="Is Withdrawn by submitter If one member ss is withdrawn by submitter, then this bit is set. If all member ss' are withdrawn, then the rs is deleted to SNPHistory">
##INFO=<ID=dbSNPBuildID,Number=1,Type=Integer,Description="First dbSNP Build for RS">
##LeftAlignVariants="analysis_type=LeftAlignVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=00-All.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false"
##contig=<ID=chr1,length=249250621,assembly=b37>
##phasing=partial
##reference=GRCh37.3
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
##source=dbSNP
##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 10144 rs144773400 TA T . PASS ASP;RSPOS=10145;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10228 rs143255646 TA T . PASS ASP;RSPOS=10229;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10234 rs145599635 C T . PASS ASP;RSPOS=10234;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10248 rs148908337 A T . PASS ASP;RSPOS=10248;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10254 rs140194106 TA T . PASS ASP;RSPOS=10255;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10291 rs145427775 C T . PASS ASP;RSPOS=10291;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10327 rs112750067 T C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10327;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 10329 rs150969722 AC A . PASS ASP;RSPOS=10330;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10351 rs145072688 CTA C,CA . PASS ASP;RSPOS=10352;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10382 rs147093981 AAC A,AC . PASS ASP;RSPOS=10383;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10433 rs56289060 A AC . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10433;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 10439 rs112766696 AC A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10440;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=132
chr1 10439 rs138941843 AC A . PASS ASP;RSPOS=10440;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10440 rs112155239 C A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10440;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 10492 rs55998931 C T . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.0617001828153565;RSPOS=10492;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040000000100;WGT=0;dbSNPBuildID=129
chr1 10519 rs62636508 G C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10519;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
chr1 10583 rs58108140 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.270566727605119;KGPilot123;RSPOS=10583;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=129
chr1 10611 rs189107123 C G . PASS KGPilot123;RSPOS=10611;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 10828 rs10218492 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10828;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
chr1 10904 rs10218493 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10904;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=119
chr1 10927 rs10218527 A G . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10927;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
chr1 10938 rs28853987 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10938;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11014 rs28484712 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11014;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11022 rs28775022 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11022;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11081 rs10218495 G T . PASS CFL;GENEINFO=LOC100652771:100652771;GNO;RSPOS=11081;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=119
chr1 11863 rs187669455 C A . PASS RSPOS=11863;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
chr1 13302 rs180734498 C T . PASS KGPilot123;RSPOS=13302;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 13327 rs144762171 G C . PASS ASP;KGPilot123;RSPOS=13327;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 13684 rs71260404 C T . PASS GENEINFO=LOC100652771:100652771;GNO;RSPOS=13684;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 13980 rs151276478 T C . PASS ASP;KGPilot123;RSPOS=13980;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 14889 rs142444908 G A . PASS ASP;RSPOS=14889;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 14907 rs79585140 A G . PASS GNO;RSPOS=14907;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 14930 rs75454623 A G . PASS GNO;RSPOS=14930;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 14976 rs71252251 G A . PASS ASP;GNO;RSPOS=14976;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
chr1 15061 rs71268703 T TG . PASS ASP;GNO;RSPOS=15061;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 15118 rs71252250 A G . PASS ASP;GNO;RSPOS=15118;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
chr1 15211 rs144718396 T G . PASS ASP;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 15211 rs78601809 T G . PASS ASP;GNO;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 16257 rs78588380 G C . PASS ASP;GNO;RSPOS=16257;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 16378 rs148220436 T C . PASS ASP;RSPOS=16378;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 16495 rs141130360 G C . PASS ASP;RSPOS=16495;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 16497 rs150723783 A G . PASS ASP;RSPOS=16497;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 17519 rs192890528 G T . PASS RSPOS=17519;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
chr1 19226 rs138930629 T A . PASS ASP;RSPOS=19226;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 20141 rs56336884 G A . PASS HD;RSPOS=20141;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000400000100;WGT=0;dbSNPBuildID=129
chr1 20144 rs143346096 G A . PASS ASP;RSPOS=20144;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 20206 rs71262675 C T . PASS GNO;RSPOS=20206;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 20245 rs71262674 G A . PASS GMAF=0.256398537477148;GNO;RSPOS=20245;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 20304 rs71262673 G C . PASS GMAF=0.338208409506399;GNO;RSPOS=20304;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 26999 rs147506580 A G . PASS ASP;RSPOS=26999;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 29436 rs2462493 G A . PASS GNO;RSPOS=29436;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=100
chr1 30923 rs140337953 G T . PASS ASP;KGPilot123;RSPOS=30923;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 33487 rs77459554 C T . PASS ASP;GNO;RSPOS=33487;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 33495 rs75468675 C T . PASS ASP;GNO;RSPOS=33495;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33505 rs75627161 T C . PASS ASP;GNO;RSPOS=33505;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33508 rs75609629 A T . PASS ASP;GNO;RSPOS=33508;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33521 rs76098219 T A . PASS GNO;RSPOS=33521;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 33593 rs557585 G A . PASS RSPOS=33593;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=83
chr1 33648 rs62028204 G T . PASS RSPOS=33648;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
chr1 33656 rs113821789 T C . PASS RSPOS=33656;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=132
chr1 51476 rs187298206 T C . PASS KGPilot123;RSPOS=51476;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51479 rs116400033 T A . PASS ASP;G5;G5A;GMAF=0.113802559414991;KGPilot123;RSPOS=51479;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070010000100;WGT=0;dbSNPBuildID=132
chr1 51803 rs62637812 T C . PASS GMAF=0.468921389396709;RSPOS=51803;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040000000100;WGT=0;dbSNPBuildID=129
chr1 51898 rs76402894 C A . PASS GMAF=0.0731261425959781;GNO;RSPOS=51898;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
chr1 51914 rs190452223 T G . PASS KGPilot123;RSPOS=51914;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51928 rs78732933 G A . PASS GNO;RSPOS=51928;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
chr1 51935 rs181754315 C T . PASS KGPilot123;RSPOS=51935;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51954 rs185832753 G C . PASS KGPilot123;RSPOS=51954;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 52058 rs62637813 G C . PASS GMAF=0.0342778793418647;KGPilot123;RSPOS=52058;SAO=0;SSR=1;VC=SNV;VLD;VP=050000000000040010000140;WGT=0;dbSNPBuildID=129
chr1 52144 rs190291950 T A . PASS KGPilot123;RSPOS=52144;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 52238 rs150021059 T G . PASS ASP;KGPilot123;RSPOS=52238;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54353 rs140052487 C A . PASS ASP;KGPilot123;RSPOS=54353;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54421 rs146477069 A G . PASS ASP;KGPilot123;RSPOS=54421;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54490 rs141149254 G A . PASS ASP;KGPilot123;RSPOS=54490;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54676 rs2462492 C T . PASS ASP;GMAF=0.191956124314442;GNO;HD;KGPilot123;RSPOS=54676;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040510000100;WGT=0;dbSNPBuildID=100
chr1 54753 rs143174675 T G . PASS ASP;KGPilot123;RSPOS=54753;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54788 rs59861892 CC C,CCT . PASS ASP;RSPOS=54789;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 54795 rs58014817 T A . PASS ASP;RSPOS=54795;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
chr1 55164 rs3091274 C A . PASS G5;G5A;GMAF=0.145338208409506;GNO;KGPilot123;RSPOS=55164;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030110000100;WGT=0;dbSNPBuildID=103
chr1 55299 rs10399749 C T . PASS G5;G5A;GMAF=0.278793418647166;GNO;KGPilot123;PH2;RSPOS=55299;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030112000100;WGT=0;dbSNPBuildID=119
chr1 55302 rs3091273 C T . PASS RSPOS=55302;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
chr1 55313 rs182462964 A T . PASS KGPilot123;RSPOS=55313;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55322 rs3107974 C T . PASS RSPOS=55322;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
chr1 55326 rs3107975 T C . PASS GNO;HD;KGPilot123;RSPOS=55326;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=103
chr1 55330 rs185215913 G A . PASS KGPilot123;RSPOS=55330;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55367 rs190850374 G A . PASS KGPilot123;RSPOS=55367;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55388 rs182711216 C T . PASS KGPilot123;RSPOS=55388;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55394 rs2949420 T A . PASS GNO;KGPilot123;PH2;RSPOS=55394;SAO=0;SSR=0;VC=SNV;VP=050000000000000112000100;WGT=0;dbSNPBuildID=101
chr1 55416 rs193242050 G A . PASS KGPilot123;RSPOS=55416;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55427 rs183189405 T C . PASS KGPilot123;RSPOS=55427;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55545 rs28396308 C T . PASS GNO;RSPOS=55545;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=125
chr1 55816 rs187434873 G A . PASS KGPilot123;RSPOS=55816;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55850 rs191890754 C G . PASS KGPilot123;RSPOS=55850;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55852 rs184233019 G C . PASS KGPilot123;RSPOS=55852;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 56644 rs143342222 A C . PASS ASP;KGPilot123;RSPOS=56644;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 57952 rs189727433 A C . PASS KGPilot123;RSPOS=57952;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 58771 rs140128481 T C . PASS ASP;RSPOS=58771;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 58814 rs114420996 G A . PASS ASP;G5;GMAF=0.0982632541133455;KGPilot123;RSPOS=58814;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
chr1 59040 rs149755937 T C . PASS ASP;KGPilot123;RSPOS=59040;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 60718 rs78395614 G A . PASS CFL;GNO;RSPOS=60718;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 60726 rs192328835 C A . PASS KGPilot123;RSPOS=60726;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 60791 rs76199781 A G . PASS CFL;GNO;RSPOS=60791;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61442 rs74970982 A G . PASS CFL;GMAF=0.076782449725777;GNO;KGPilot123;RSPOS=61442;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61462 rs56992750 T A . PASS CFL;G5;G5A;GMAF=0.0383912248628885;GNO;KGPilot123;RSPOS=61462;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008030110000100;WGT=0;dbSNPBuildID=129
chr1 61480 rs75526266 G C . PASS CFL;GNO;RSPOS=61480;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61499 rs75719746 G A . PASS CFL;GNO;RSPOS=61499;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61743 rs184286948 G C . PASS KGPilot123;RSPOS=61743;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 61920 rs62637820 G A . PASS CFL;GMAF=0.0255941499085923;RSPOS=61920;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=129
chr1 61987 rs76735897 A G . PASS CFL;GMAF=0.292961608775137;GNO;KGPilot123;RSPOS=61987;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61989 rs77573425 G C . PASS CFL;GMAF=0.309414990859232;GNO;KGPilot123;RSPOS=61989;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61993 rs190553843 C T . PASS KGPilot123;RSPOS=61993;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 62156 rs181864839 C T . PASS KGPilot123;RSPOS=62156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 62157 rs10399597 G A . PASS CFL;GMAF=0.00228519195612431;KGPilot123;RSPOS=62157;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=119
chr1 62162 rs140556834 G A . PASS ASP;KGPilot123;RSPOS=62162;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 62203 rs28402963 T C . PASS CFL;KGPilot123;RSPOS=62203;SAO=0;SSR=0;VC=SNV;VP=050000000008000010000100;WGT=0;dbSNPBuildID=125
chr1 62271 rs28599927 A G . PASS CFL;GMAF=0.138482632541133;RSPOS=62271;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=125
chr1 63268 rs75478250 T C . PASS CFL;GNO;RSPOS=63268;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 63276 rs185977555 G A . PASS KGPilot123;RSPOS=63276;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 63297 rs188886746 G A . PASS KGPilot123;RSPOS=63297;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 63671 rs116440577 G A . PASS ASP;G5;GMAF=0.170018281535649;KGPilot123;RSPOS=63671;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
chr1 63737 rs77426996 TACT T,TCTA . PASS CFL;RSPOS=63738;SAO=0;SSR=0;VC=DIV;VP=050000000008000000000200;WGT=0;dbSNPBuildID=131
chr1 64649 rs181431124 A C . PASS KGPilot123;RSPOS=64649;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66008 rs2691286 C G . PASS CFL;GNO;RSPOS=66008;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=100
chr1 66162 rs62639105 A T . PASS CFL;GMAF=0.320383912248629;GNO;KGPilot123;RSPOS=66162;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000110000100;WGT=0;dbSNPBuildID=129
chr1 66176 rs28552463 T A . PASS CFL;GMAF=0.0484460694698355;KGPilot123;RSPOS=66176;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=125
chr1 66219 rs181028663 A T . PASS KGPilot123;RSPOS=66219;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66238 rs113961546 T A . PASS CFL;GNO;RSPOS=66238;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=132
chr1 66314 rs28534012 T A . PASS CFL;RSPOS=66314;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
chr1 66331 rs186063952 A C . PASS KGPilot123;RSPOS=66331;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66334 rs28464214 T A . PASS CFL;RSPOS=66334;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
chr1 66442 rs192044252 T A . PASS KGPilot123;RSPOS=66442;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66457 rs13328655 T A . PASS CFL;GMAF=0.0795246800731261;KGPilot123;RSPOS=66457;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=121
chr1 66503 rs112350669 T A . PASS CFL;RSPOS=66503;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=132
chr1 66507 rs12401368 T A . PASS CFL;GMAF=0.479890310786106;KGPilot123;RSPOS=66507;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=120
chr1 66651 rs2257270 A T . PASS CFL;GNO;RSPOS=66651;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=100
chr1 67179 rs149952626 C G . PASS ASP;KGPilot123;RSPOS=67179;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 67181 rs77662731 A G . PASS ASP;G5;G5A;GENEINFO=OR4F5:79501;GMAF=0.0470749542961609;GNO;KGPilot123;RSPOS=67181;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070110000100;WGT=0;dbSNPBuildID=131
chr1 67223 rs78676975 C A . PASS ASP;GENEINFO=OR4F5:79501;GNO;RSPOS=67223;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 69428 rs140739101 T G . PASS ASP;RSPOS=69428;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69453 rs142004627 G A . PASS ASP;RSPOS=69453;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 69476 rs148502021 T C . PASS ASP;RSPOS=69476;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69496 rs150690004 G A . PASS ASP;RSPOS=69496;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69511 rs75062661 A G . PASS GENEINFO=OR4F5:79501;GMAF=0.193784277879342;GNO;KGPilot123;RSPOS=69511;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000110000100;WGT=0;dbSNPBuildID=131
chr1 69534 rs190717287 T C . PASS KGPilot123;RSPOS=69534;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000010000100;WGT=0;dbSNPBuildID=135
chr1 69552 rs55874132 G C . PASS GENEINFO=OR4F5:79501;HD;RSPOS=69552;S3D;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050300000000040400000100;WGT=0;dbSNPBuildID=129
chr1 69590 rs141776804 T A . PASS ASP;RSPOS=69590;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 69594 rs144967600 T C . PASS ASP;RSPOS=69594;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 72148 rs182862337 C T . PASS KGPilot123;RSPOS=72148;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 73841 rs143773730 C T . PASS ASP;KGPilot123;RSPOS=73841;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 74651 rs62641291 G A . PASS RSPOS=74651;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
chr1 74681 rs13328683 G T . PASS CFL;GMAF=0.286106032906764;RSPOS=74681;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=121
chr1 74709 rs62641292 T A . PASS CFL;RSPOS=74709;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=129
chr1 74771 rs13328675 A G . PASS CFL;RSPOS=74771;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 74790 rs13328700 C G . PASS CFL;RSPOS=74790;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 74792 rs13328684 G A . PASS CFL;RSPOS=74792;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 77462 rs188023513 G A . PASS KGPilot123;RSPOS=77462;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77470 rs192898053 T C . PASS KGPilot123;RSPOS=77470;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77874 rs184538873 G A . PASS KGPilot123;RSPOS=77874;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77961 rs78385339 G A . PASS GMAF=0.125685557586837;KGPilot123;RSPOS=77961;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040010000100;WGT=0;dbSNPBuildID=131
chr1 79033 rs62641298 A G . PASS GMAF=0.438299817184644;GNO;HD;KGPilot123;RSPOS=79033;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=129
chr1 79050 rs62641299 G T . PASS GMAF=0.224405850091408;GNO;KGPilot123;RSPOS=79050;SAO=0;SSR=0;VC=SNV;VP=050000000000000110000100;WGT=0;dbSNPBuildID=129
chr1 79137 rs143777184 A T . PASS ASP;KGPilot123;RSPOS=79137;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 79417 rs184768190 C T . PASS KGPilot123;RSPOS=79417;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 79418 rs2691296 G C . PASS GMAF=0.0178244972577697;RSPOS=79418;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=100
chr1 79538 rs2691295 C T . PASS RSPOS=79538;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=100
chr1 79772 rs147215883 C G . PASS ASP;KGPilot123;RSPOS=79772;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 79872 rs189224661 T G . PASS KGPilot123;RSPOS=79872;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 80323 rs3942603 G C . PASS CFL;GNO;RSPOS=80323;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=108
chr1 80386 rs3878915 C A . PASS GMAF=0.0118829981718464;RSPOS=80386;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=108
chr1 80454 rs144226842 G C . PASS ASP;KGPilot123;RSPOS=80454;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 81836 rs2259560 A T . PASS ASP;GNO;RSPOS=81836;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=100
chr1 81949 rs181567186 T C . PASS KGPilot123;RSPOS=81949;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 81962 rs4030308 T TAA . PASS ASP;RSPOS=81962;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 82102 rs4030307 C T . PASS ASP;RSPOS=82102;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82103 rs2020400 T C . PASS ASP;RSPOS=82103;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
chr1 82126 rs1815133 C T . PASS ASP;RSPOS=82126;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
chr1 82133 rs4030306 CA C,CAAAAAAAAAAAAAAA . PASS ASP;RSPOS=82136;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 82154 rs4477212 A G . PASS ASP;HD;RSPOS=82154;SAO=0;SSR=0;VC=SNV;VP=050000000004000400000100;WGT=0;dbSNPBuildID=111
chr1 82162 rs1815132 C A . PASS ASP;GMAF=0.0351919561243144;GNO;RSPOS=82162;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=92
chr1 82163 rs139113303 G A . PASS ASP;KGPilot123;RSPOS=82163;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 82196 rs112844054 A T . PASS ASP;RSPOS=82196;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 82249 rs1851945 A G . PASS ASP;GMAF=0.0452468007312614;KGPilot123;RSPOS=82249;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=92
chr1 82282 rs3871775 G A . PASS ASP;RSPOS=82282;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82303 rs3871776 T C . PASS ASP;RSPOS=82303;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82316 rs4030305 A C . PASS ASP;GNO;RSPOS=82316;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=108
chr1 82609 rs149189449 C G . PASS ASP;KGPilot123;RSPOS=82609;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 82676 rs185237834 T G . PASS KGPilot123;RSPOS=82676;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 82734 rs4030331 T C . PASS ASP;GMAF=0.261882998171846;KGPilot123;RSPOS=82734;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=108
chr1 82957 rs189774606 C T . PASS KGPilot123;RSPOS=82957;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83084 rs181193408 T A . PASS KGPilot123;RSPOS=83084;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83088 rs186081601 G C . PASS KGPilot123;RSPOS=83088;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83107 rs4405097 G C . PASS ASP;RSPOS=83107;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=111
chr1 83119 rs4030324 AA A,ATAAC . PASS ASP;RSPOS=83120;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 83771 rs189906733 T G . PASS KGPilot123;RSPOS=83771;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83786 rs58520670 T TA . PASS ASP;RSPOS=83794;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83815 rs58857344 GAGAA G . PASS ASP;RSPOS=83827;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83826 rs71281475 AAAGA A,AAA . PASS ASP;GNO;RSPOS=83827;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 83855 rs59596480 GAA G . PASS ASP;RSPOS=83857;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83872 rs59556914 AA A,AAGA . PASS ASP;RSPOS=83873;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83884 rs59586754 GAAA G . PASS ASP;RSPOS=83885;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83897 rs61330047 GAA G . PASS ASP;RSPOS=83899;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83901 rs58254183 GAAAGAA G . PASS ASP;RSPOS=83903;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83921 rs61338823 GAA G . PASS ASP;RSPOS=83923;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83930 rs71281474 AG A,AGA . PASS ASP;GNO;RSPOS=83931;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 83934 rs59235392 AG A,AGAAA . PASS ASP;RSPOS=83935;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83977 rs180759811 A G . PASS KGPilot123;RSPOS=83977;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84002 rs28850140 G A . PASS ASP;GMAF=0.138939670932358;KGPilot123;RSPOS=84002;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=125
chr1 84010 rs186443818 G A . PASS KGPilot123;RSPOS=84010;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84018 rs61352176 GAA G . PASS ASP;RSPOS=84020;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 84079 rs190867312 T C . PASS KGPilot123;RSPOS=84079;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84139 rs183605470 A T . PASS KGPilot123;RSPOS=84139;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84156 rs188652299 A C . PASS KGPilot123;RSPOS=84156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84244 rs191297051 A C . PASS KGPilot123;RSPOS=84244;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84295 rs183209871 G A . PASS KGPilot123;RSPOS=84295;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84346 rs187855973 T C . PASS KGPilot123;RSPOS=84346;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84453 rs191379015 C G . PASS KGPilot123;RSPOS=84453;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84705 rs183470350 T G . PASS KGPilot123;RSPOS=84705;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135

View File

@ -0,0 +1 @@
100000 1 0

View File

@ -0,0 +1,3 @@
100000 1 11
0 chr1 (null)
0 100000 0

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.