Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
7a386b45a5
|
|
@ -711,6 +711,8 @@ public class SAMDataSource {
|
|||
* @param validationStringency validation stringency.
|
||||
*/
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||
int totalNumberOfFiles = readerIDs.size();
|
||||
int readerNumber = 1;
|
||||
for(SAMReaderID readerID: readerIDs) {
|
||||
File indexFile = findIndexFile(readerID.samFile);
|
||||
|
||||
|
|
@ -728,8 +730,7 @@ public class SAMDataSource {
|
|||
reader.enableFileSource(true);
|
||||
reader.setValidationStringency(validationStringency);
|
||||
|
||||
final SAMFileHeader header = reader.getFileHeader();
|
||||
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
||||
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
|
||||
|
||||
readers.put(readerID,reader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -142,20 +142,75 @@ public class SampleDB {
|
|||
* @return
|
||||
*/
|
||||
public final Map<String, Set<Sample>> getFamilies() {
|
||||
return getFamilies(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a map from family ID -> set of family members for all samples in sampleIds with
|
||||
* non-null family ids
|
||||
*
|
||||
* @param sampleIds - all samples to include. If null is passed then all samples are returned.
|
||||
* @return
|
||||
*/
|
||||
public final Map<String, Set<Sample>> getFamilies(Collection<String> sampleIds) {
|
||||
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
|
||||
|
||||
for ( final Sample sample : samples.values() ) {
|
||||
final String famID = sample.getFamilyID();
|
||||
if ( famID != null ) {
|
||||
if ( ! families.containsKey(famID) )
|
||||
families.put(famID, new TreeSet<Sample>());
|
||||
families.get(famID).add(sample);
|
||||
if(sampleIds == null || sampleIds.contains(sample.getID())){
|
||||
final String famID = sample.getFamilyID();
|
||||
if ( famID != null ) {
|
||||
if ( ! families.containsKey(famID) )
|
||||
families.put(famID, new TreeSet<Sample>());
|
||||
families.get(famID).add(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return families;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the set of all children that have both of their parents.
|
||||
* Note that if a family is composed of more than 1 child, each child is
|
||||
* returned.
|
||||
* @return - all the children that have both of their parents
|
||||
*/
|
||||
public final Set<Sample> getChildrenWithParents(){
|
||||
return getChildrenWithParents(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of all children that have both of their parents.
|
||||
* Note that if triosOnly = false, a family is composed of more than 1 child, each child is
|
||||
* returned.
|
||||
*
|
||||
* This method can be used wherever trios are needed
|
||||
*
|
||||
* @param triosOnly - if set to true, only strict trios are returned
|
||||
* @return - all the children that have both of their parents
|
||||
*/
|
||||
public final Set<Sample> getChildrenWithParents(boolean triosOnly) {
|
||||
|
||||
Map<String, Set<Sample>> families = getFamilies();
|
||||
final Set<Sample> childrenWithParents = new HashSet<Sample>();
|
||||
Iterator<Sample> sampleIterator;
|
||||
|
||||
for ( Set<Sample> familyMembers: families.values() ) {
|
||||
if(triosOnly && familyMembers.size() != 3)
|
||||
continue;
|
||||
|
||||
sampleIterator = familyMembers.iterator();
|
||||
Sample sample;
|
||||
while(sampleIterator.hasNext()){
|
||||
sample = sampleIterator.next();
|
||||
if(sample.getParents().size() == 2 && familyMembers.containsAll(sample.getParents()))
|
||||
childrenWithParents.add(sample);
|
||||
}
|
||||
|
||||
}
|
||||
return childrenWithParents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all samples with a given family ID
|
||||
* @param familyId
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
return getToolkit().getMasterSequenceDictionary();
|
||||
}
|
||||
|
||||
protected SampleDB getSampleDB() {
|
||||
public SampleDB getSampleDB() {
|
||||
return getToolkit().getSampleDB();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,22 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -30,23 +26,26 @@ import java.util.Map;
|
|||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
private String motherId;
|
||||
private String fatherId;
|
||||
private String childId;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( mendelianViolation == null ) {
|
||||
if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
else {
|
||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line.");
|
||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() &&
|
||||
vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() &&
|
||||
vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods();
|
||||
boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() &&
|
||||
vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() &&
|
||||
vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods();
|
||||
if ( hasAppropriateGenotypes )
|
||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
|
||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId));
|
||||
|
||||
return toRet;
|
||||
}
|
||||
|
|
@ -55,4 +54,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||
|
||||
private boolean checkAndSetSamples(SampleDB db){
|
||||
Set<String> families = db.getFamilyIDs();
|
||||
if(families.size() != 1)
|
||||
return false;
|
||||
|
||||
Set<Sample> family = db.getFamily(families.iterator().next());
|
||||
if(family.size() != 3)
|
||||
return false;
|
||||
|
||||
Iterator<Sample> sampleIter = family.iterator();
|
||||
Sample sample;
|
||||
for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){
|
||||
if(sample.getParents().size()==2){
|
||||
motherId = sample.getMaternalID();
|
||||
fatherId = sample.getPaternalID();
|
||||
childId = sample.getID();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
for ( final Genotype genotype : genotypes ) {
|
||||
|
||||
// we care only about variant calls with likelihoods
|
||||
if ( genotype.isHomRef() )
|
||||
if ( !genotype.isHet() && !genotype.isHomVar() )
|
||||
continue;
|
||||
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
|
|
|
|||
|
|
@ -12,10 +12,8 @@ import org.broadinstitute.sting.utils.MendelianViolation;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -26,42 +24,33 @@ import java.util.*;
|
|||
|
||||
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
private Set<MendelianViolation> fullMVSet = null;
|
||||
private Set<Sample> trios = null;
|
||||
private final static int REF = 0;
|
||||
private final static int HET = 1;
|
||||
private final static int HOM = 2;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( fullMVSet == null ) {
|
||||
fullMVSet = new HashSet<MendelianViolation>();
|
||||
|
||||
if ( trios == null ) {
|
||||
if ( walker instanceof VariantAnnotator ) {
|
||||
final Map<String,Set<Sample>> families = ((VariantAnnotator) walker).getSampleDB().getFamilies();
|
||||
for( final Set<Sample> family : families.values() ) {
|
||||
for( final Sample sample : family ) {
|
||||
if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now
|
||||
fullMVSet.add( new MendelianViolation(sample, 0.0) );
|
||||
}
|
||||
}
|
||||
}
|
||||
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
|
||||
} else {
|
||||
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
|
||||
}
|
||||
}
|
||||
|
||||
final Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||
final HashSet<MendelianViolation> mvsToTest = new HashSet<MendelianViolation>();
|
||||
final HashSet<Sample> triosToTest = new HashSet<Sample>();
|
||||
|
||||
for( final MendelianViolation mv : fullMVSet ) {
|
||||
final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() &&
|
||||
vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() &&
|
||||
vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods();
|
||||
for( final Sample child : trios) {
|
||||
final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() &&
|
||||
vc.hasGenotype(child.getPaternalID()) && vc.getGenotype(child.getPaternalID()).hasLikelihoods() &&
|
||||
vc.hasGenotype(child.getMaternalID()) && vc.getGenotype(child.getMaternalID()).hasLikelihoods();
|
||||
if ( hasAppropriateGenotypes ) {
|
||||
mvsToTest.add(mv);
|
||||
triosToTest.add(child);
|
||||
}
|
||||
}
|
||||
|
||||
toRet.put("TDT", calculateTDT( vc, mvsToTest ));
|
||||
toRet.put("TDT", calculateTDT( vc, triosToTest ));
|
||||
|
||||
return toRet;
|
||||
}
|
||||
|
|
@ -72,27 +61,27 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
|||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
||||
|
||||
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
||||
private double calculateTDT( final VariantContext vc, final Set<MendelianViolation> mvsToTest ) {
|
||||
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
|
||||
|
||||
final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM);
|
||||
final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM);
|
||||
final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET);
|
||||
final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET);
|
||||
final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET);
|
||||
final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET);
|
||||
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM);
|
||||
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM);
|
||||
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
|
||||
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
|
||||
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET);
|
||||
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET);
|
||||
|
||||
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
|
||||
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
|
||||
return (numer * numer) / denom;
|
||||
}
|
||||
|
||||
private double calculateNChildren( final VariantContext vc, final Set<MendelianViolation> mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
||||
final double likelihoodVector[] = new double[mvsToTest.size() * 2];
|
||||
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
||||
final double likelihoodVector[] = new double[triosToTest.size() * 2];
|
||||
int iii = 0;
|
||||
for( final MendelianViolation mv : mvsToTest ) {
|
||||
final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector();
|
||||
final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector();
|
||||
final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector();
|
||||
for( final Sample child : triosToTest ) {
|
||||
final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
|
||||
final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
|
||||
final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
|
||||
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
|
||||
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -167,9 +167,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||
protected boolean indelsOnly = false;
|
||||
|
||||
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
|
||||
public String familyStr = null;
|
||||
|
||||
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
||||
public double minGenotypeQualityP = 0.0;
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
|
||||
protected enum GenotypeType { AA, AB, BB }
|
||||
|
||||
protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE;
|
||||
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
|
||||
|
||||
protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
this.N = N;
|
||||
|
|
@ -62,24 +62,14 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
* @param GLs genotype likelihoods
|
||||
* @param Alleles Alleles corresponding to GLs
|
||||
* @param log10AlleleFrequencyPriors priors
|
||||
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
|
||||
* @param GLs genotype likelihoods
|
||||
* @param Alleles Alleles corresponding to GLs
|
||||
* @param log10AlleleFrequencyPriors priors
|
||||
* @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results
|
||||
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results
|
||||
*/
|
||||
protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors);
|
||||
|
||||
/**
|
||||
* Can be overridden by concrete subclasses
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param log10AlleleFrequencyPosteriors allele frequency results
|
||||
* @param AFofMaxLikelihood allele frequency of max likelihood
|
||||
*
|
||||
* @return calls
|
||||
*/
|
||||
protected abstract GenotypesContext assignGenotypes(VariantContext vc,
|
||||
double[] log10AlleleFrequencyPosteriors,
|
||||
int AFofMaxLikelihood);
|
||||
double[][] log10AlleleFrequencyPriors,
|
||||
double[][] log10AlleleFrequencyLikelihoods,
|
||||
double[][] log10AlleleFrequencyPosteriors);
|
||||
}
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
public class BiallelicGenotypeLikelihoods {
|
||||
|
||||
private String sample;
|
||||
private double[] GLs;
|
||||
private Allele A, B;
|
||||
private int depth;
|
||||
|
||||
/**
|
||||
* Create a new object for sample with given alleles and genotype likelihoods
|
||||
*
|
||||
* @param sample sample name
|
||||
* @param A allele A
|
||||
* @param B allele B
|
||||
* @param log10AALikelihoods AA likelihoods
|
||||
* @param log10ABLikelihoods AB likelihoods
|
||||
* @param log10BBLikelihoods BB likelihoods
|
||||
* @param depth the read depth used in creating the likelihoods
|
||||
*/
|
||||
public BiallelicGenotypeLikelihoods(String sample,
|
||||
Allele A,
|
||||
Allele B,
|
||||
double log10AALikelihoods,
|
||||
double log10ABLikelihoods,
|
||||
double log10BBLikelihoods,
|
||||
int depth) {
|
||||
this.sample = sample;
|
||||
this.A = A;
|
||||
this.B = B;
|
||||
this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods};
|
||||
this.depth = depth;
|
||||
}
|
||||
|
||||
public String getSample() {
|
||||
return sample;
|
||||
}
|
||||
|
||||
public double getAALikelihoods() {
|
||||
return GLs[0];
|
||||
}
|
||||
|
||||
public double getABLikelihoods() {
|
||||
return GLs[1];
|
||||
}
|
||||
|
||||
public double getBBLikelihoods() {
|
||||
return GLs[2];
|
||||
}
|
||||
|
||||
public double[] getLikelihoods() {
|
||||
return GLs;
|
||||
}
|
||||
|
||||
public Allele getAlleleA() {
|
||||
return A;
|
||||
}
|
||||
|
||||
public Allele getAlleleB() {
|
||||
return B;
|
||||
}
|
||||
|
||||
public int getDepth() {
|
||||
return depth;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: Aug 4, 2009
|
||||
* Time: 6:46:09 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public enum DiploidGenotype {
|
||||
AA ('A', 'A'),
|
||||
AC ('A', 'C'),
|
||||
|
|
@ -110,6 +103,20 @@ public enum DiploidGenotype {
|
|||
return conversionMatrix[index1][index2];
|
||||
}
|
||||
|
||||
/**
|
||||
* create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
|
||||
* @param baseIndex1 base1
|
||||
* @param baseIndex2 base2
|
||||
* @return the diploid genotype
|
||||
*/
|
||||
public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) {
|
||||
if ( baseIndex1 == -1 )
|
||||
throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
|
||||
if ( baseIndex2 == -1 )
|
||||
throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
|
||||
return conversionMatrix[baseIndex1][baseIndex2];
|
||||
}
|
||||
|
||||
private static final DiploidGenotype[][] conversionMatrix = {
|
||||
{ DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT },
|
||||
{ DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT },
|
||||
|
|
|
|||
|
|
@ -27,80 +27,43 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||
//
|
||||
// code for testing purposes
|
||||
//
|
||||
|
||||
private final static boolean DEBUG = false;
|
||||
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
|
||||
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
private final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
|
||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
public void getLog10PNonRef(GenotypesContext GLs, List<Allele> alleles,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors) {
|
||||
public void getLog10PNonRef(final GenotypesContext GLs,
|
||||
final List<Allele> alleles,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[][] log10AlleleFrequencyLikelihoods,
|
||||
final double[][] log10AlleleFrequencyPosteriors) {
|
||||
final int numAlleles = alleles.size();
|
||||
final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
|
||||
final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
|
||||
|
||||
int idxDiag = numAlleles;
|
||||
int incr = numAlleles - 1;
|
||||
for (int k=1; k < numAlleles; k++) {
|
||||
// multi-allelic approximation, part 1: Ideally
|
||||
// for each alt allele compute marginal (suboptimal) posteriors -
|
||||
// compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of
|
||||
// as a row-wise upper triangular matrix of likelihoods.
|
||||
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
|
||||
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
|
||||
|
||||
final int idxAA = 0;
|
||||
final int idxAB = k;
|
||||
// yy is always element on the diagonal.
|
||||
// 2 alleles: BBelement 2
|
||||
// 3 alleles: BB element 3. CC element 5
|
||||
// 4 alleles:
|
||||
final int idxBB = idxDiag;
|
||||
idxDiag += incr--;
|
||||
|
||||
final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
||||
|
||||
if (numAlleles > 2) {
|
||||
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
|
||||
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
|
||||
}
|
||||
}
|
||||
|
||||
if (numAlleles > 2) {
|
||||
// multiallelic approximation, part 2:
|
||||
// report posteriors for allele that has highest estimated AC
|
||||
int mostLikelyAlleleIdx = MathUtils.maxElementIndex(bestAFguess);
|
||||
for (int k=0; k < log10AlleleFrequencyPosteriors.length-1; k++)
|
||||
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
|
||||
|
||||
}
|
||||
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
|
||||
}
|
||||
|
||||
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
|
||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); // TODO -- initialize with size of GLs
|
||||
|
||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
|
||||
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
}
|
||||
}
|
||||
|
|
@ -109,9 +72,401 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
|
||||
|
||||
final static double approximateLog10SumLog10(double[] vals) {
|
||||
if ( vals.length < 2 )
|
||||
throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10");
|
||||
|
||||
double approx = approximateLog10SumLog10(vals[0], vals[1]);
|
||||
for ( int i = 2; i < vals.length; i++ )
|
||||
approx = approximateLog10SumLog10(approx, vals[i]);
|
||||
return approx;
|
||||
}
|
||||
|
||||
final static double approximateLog10SumLog10(double small, double big) {
|
||||
// make sure small is really the smaller value
|
||||
if ( small > big ) {
|
||||
final double t = big;
|
||||
big = small;
|
||||
small = t;
|
||||
}
|
||||
|
||||
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
|
||||
return big;
|
||||
|
||||
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
|
||||
return big;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup
|
||||
// with integer quantization
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||
|
||||
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
||||
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
||||
return big + MathUtils.jacobianLogTable[ind];
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Linearized, ~O(N), implementation.
|
||||
// Multi-allelic implementation.
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
|
||||
// a wrapper around the int array so that we can make it hashable
|
||||
private static final class ExactACcounts {
|
||||
|
||||
private final int[] counts;
|
||||
private int hashcode = -1;
|
||||
|
||||
public ExactACcounts(final int[] counts) {
|
||||
this.counts = counts;
|
||||
}
|
||||
|
||||
public int[] getCounts() {
|
||||
return counts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if ( hashcode == -1 )
|
||||
hashcode = Arrays.hashCode(counts);
|
||||
return hashcode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(counts[0]);
|
||||
for ( int i = 1; i < counts.length; i++ ) {
|
||||
sb.append("/");
|
||||
sb.append(counts[i]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// This class represents a column in the Exact AC calculation matrix
|
||||
private static final class ExactACset {
|
||||
|
||||
// the counts of the various alternate alleles which this column represents
|
||||
final ExactACcounts ACcounts;
|
||||
|
||||
// the column of the matrix
|
||||
final double[] log10Likelihoods;
|
||||
|
||||
// mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column;
|
||||
// for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB.
|
||||
final HashMap<ExactACcounts, Integer> ACsetIndexToPLIndex = new HashMap<ExactACcounts, Integer>();
|
||||
|
||||
// to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them
|
||||
final ArrayList<ExactACcounts> dependentACsetsToDelete = new ArrayList<ExactACcounts>();
|
||||
|
||||
|
||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||
this.ACcounts = ACcounts;
|
||||
log10Likelihoods = new double[size];
|
||||
}
|
||||
|
||||
// sum of all the non-reference alleles
|
||||
public int getACsum() {
|
||||
int sum = 0;
|
||||
for ( int count : ACcounts.getCounts() )
|
||||
sum += count;
|
||||
return sum;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false;
|
||||
}
|
||||
}
|
||||
|
||||
public static void linearExactMultiAllelic(final GenotypesContext GLs,
|
||||
final int numAlternateAlleles,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[][] log10AlleleFrequencyLikelihoods,
|
||||
final double[][] log10AlleleFrequencyPosteriors,
|
||||
final boolean preserveData) {
|
||||
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
final int numChr = 2*numSamples;
|
||||
|
||||
// queue of AC conformations to process
|
||||
final Queue<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||
|
||||
// mapping of ExactACset indexes to the objects
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
|
||||
|
||||
// add AC=0 to the queue
|
||||
int[] zeroCounts = new int[numAlternateAlleles];
|
||||
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
|
||||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||
}
|
||||
}
|
||||
|
||||
private static double calculateAlleleCountConformation(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final double maxLog10L,
|
||||
final int numChr,
|
||||
final boolean preserveData,
|
||||
final Queue<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[][] log10AlleleFrequencyLikelihoods,
|
||||
final double[][] log10AlleleFrequencyPosteriors) {
|
||||
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||
|
||||
// compute the log10Likelihoods
|
||||
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
|
||||
// clean up memory
|
||||
if ( !preserveData ) {
|
||||
for ( ExactACcounts index : set.dependentACsetsToDelete ) {
|
||||
indexesToACset.put(index, null);
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
|
||||
// can we abort early because the log10Likelihoods are so small?
|
||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
|
||||
// no reason to keep this data around because nothing depends on it
|
||||
if ( !preserveData )
|
||||
indexesToACset.put(set.ACcounts, null);
|
||||
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
// iterate over higher frequencies if possible
|
||||
final int ACwiggle = numChr - set.getACsum();
|
||||
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||
return log10LofK;
|
||||
|
||||
ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing
|
||||
final int numAltAlleles = set.ACcounts.getCounts().length;
|
||||
|
||||
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
|
||||
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
|
||||
|
||||
// add conformations for the k+1 case
|
||||
int PLindex = 0;
|
||||
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
ACcountsClone[allele]++;
|
||||
lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
|
||||
}
|
||||
|
||||
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||
if ( ACwiggle > 1 ) {
|
||||
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
|
||||
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
ACcountsClone[allele_i]++;
|
||||
ACcountsClone[allele_j]++;
|
||||
lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
|
||||
// over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
|
||||
if ( !preserveData && lastSet == null ) {
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
|
||||
lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
|
||||
}
|
||||
if ( lastSet != null )
|
||||
lastSet.dependentACsetsToDelete.add(set.ACcounts);
|
||||
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
||||
// also adds it as a dependency to the given callingSetIndex.
|
||||
// returns the ExactACset if that set was not already in the queue and null otherwise.
|
||||
private static ExactACset updateACset(final int[] ACcounts,
|
||||
final int numChr,
|
||||
final ExactACset callingSet,
|
||||
final int PLsetIndex,
|
||||
final Queue<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||
final ExactACcounts index = new ExactACcounts(ACcounts);
|
||||
boolean wasInQueue = true;
|
||||
if ( !indexesToACset.containsKey(index) ) {
|
||||
ExactACset set = new ExactACset(numChr/2 +1, index);
|
||||
indexesToACset.put(index, set);
|
||||
ACqueue.add(set);
|
||||
wasInQueue = false;
|
||||
}
|
||||
|
||||
// add the given dependency to the set
|
||||
final ExactACset set = indexesToACset.get(index);
|
||||
set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex);
|
||||
return wasInQueue ? null : set;
|
||||
}
|
||||
|
||||
private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final Queue<ExactACset> ACqueue) {
|
||||
ExactACset set = null;
|
||||
for ( ExactACset queued : ACqueue ) {
|
||||
if ( queued.dependentACsetsToDelete.contains(callingSetIndex) )
|
||||
set = queued;
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
private static void computeLofK(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[][] log10AlleleFrequencyLikelihoods,
|
||||
final double[][] log10AlleleFrequencyPosteriors) {
|
||||
|
||||
set.log10Likelihoods[0] = 0.0; // the zero case
|
||||
final int totalK = set.getACsum();
|
||||
|
||||
// special case for k = 0 over all k
|
||||
if ( totalK == 0 ) {
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ )
|
||||
set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
|
||||
}
|
||||
// k > 0 for at least one k
|
||||
else {
|
||||
// all possible likelihoods for a given cell from which to choose the max
|
||||
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
|
||||
final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it
|
||||
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
|
||||
// initialize
|
||||
for ( int i = 0; i < numPaths; i++ )
|
||||
// TODO -- Arrays.fill?
|
||||
// todo -- is this even necessary? Why not have as else below?
|
||||
log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
|
||||
|
||||
// deal with the AA case first
|
||||
if ( totalK < 2*j-1 )
|
||||
log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
||||
|
||||
// deal with the other possible conformations now
|
||||
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||
int conformationIndex = 1;
|
||||
for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
|
||||
log10ConformationLikelihoods[conformationIndex++] =
|
||||
determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()];
|
||||
}
|
||||
}
|
||||
|
||||
final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods);
|
||||
|
||||
// finally, update the L(j,k) value
|
||||
set.log10Likelihoods[j] = log10Max - logDenominator;
|
||||
}
|
||||
}
|
||||
|
||||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
|
||||
// determine the power of theta to use
|
||||
int nonRefAlleles = 0;
|
||||
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
|
||||
if ( set.ACcounts.getCounts()[i] > 0 )
|
||||
nonRefAlleles++;
|
||||
}
|
||||
|
||||
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
|
||||
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
|
||||
int AC = set.ACcounts.getCounts()[i];
|
||||
log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
|
||||
|
||||
// for k=0 we still want to use theta
|
||||
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
|
||||
log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
|
||||
}
|
||||
}
|
||||
|
||||
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
|
||||
// todo -- arent' there a small number of fixed values that this function can adopt?
|
||||
// todo -- at a minimum it'd be good to partially compute some of these in ACCounts for performance
|
||||
// todo -- need to cache PLIndex -> two alleles, compute looping over each PLIndex. Note all other operations are efficient
|
||||
// todo -- this can be computed once at the start of the all operations
|
||||
|
||||
// the closed form representation generalized for multiple alleles is as follows:
|
||||
// AA: (2j - totalK) * (2j - totalK - 1)
|
||||
// AB: 2k_b * (2j - totalK)
|
||||
// AC: 2k_c * (2j - totalK)
|
||||
// BB: k_b * (k_b - 1)
|
||||
// BC: 2 * k_b * k_c
|
||||
// CC: k_c * (k_c - 1)
|
||||
|
||||
final int numAltAlleles = ACcounts.length;
|
||||
|
||||
// the AX het case
|
||||
if ( PLindex <= numAltAlleles )
|
||||
return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK];
|
||||
|
||||
int subtractor = numAltAlleles+1;
|
||||
int subtractions = 0;
|
||||
do {
|
||||
PLindex -= subtractor;
|
||||
subtractor--;
|
||||
subtractions++;
|
||||
}
|
||||
while ( PLindex >= subtractor );
|
||||
|
||||
final int k_i = ACcounts[subtractions-1];
|
||||
|
||||
// the hom var case (e.g. BB, CC, DD)
|
||||
final double coeff;
|
||||
if ( PLindex == 0 ) {
|
||||
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
|
||||
}
|
||||
// the het non-ref case (e.g. BC, BD, CD)
|
||||
else {
|
||||
final int k_j = ACcounts[subtractions+PLindex-1];
|
||||
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
|
||||
}
|
||||
|
||||
return coeff;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Deprecated bi-allelic ~O(N) implementation. Kept here for posterity.
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
|
|
@ -119,6 +474,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
|
||||
* for the exact model calculation
|
||||
*/
|
||||
/*
|
||||
private final static class ExactACCache {
|
||||
double[] kMinus2, kMinus1, kMinus0;
|
||||
|
||||
|
|
@ -154,7 +510,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
public int linearExact(GenotypesContext GLs,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
||||
double[][] log10AlleleFrequencyLikelihoods,
|
||||
double[][] log10AlleleFrequencyPosteriors) {
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
final int numChr = 2*numSamples;
|
||||
|
|
@ -171,7 +528,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
if ( k == 0 ) { // special case for k = 0
|
||||
for ( int j=1; j <= numSamples; j++ ) {
|
||||
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA];
|
||||
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0];
|
||||
}
|
||||
} else { // k > 0
|
||||
final double[] kMinus1 = logY.getkMinus1();
|
||||
|
|
@ -184,14 +541,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
double aa = Double.NEGATIVE_INFINITY;
|
||||
double ab = Double.NEGATIVE_INFINITY;
|
||||
if (k < 2*j-1)
|
||||
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[idxAA];
|
||||
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
|
||||
|
||||
if (k < 2*j)
|
||||
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[idxAB];
|
||||
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
|
||||
|
||||
double log10Max;
|
||||
if (k > 1) {
|
||||
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[idxBB];
|
||||
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
|
||||
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
||||
} else {
|
||||
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
||||
|
|
@ -205,7 +562,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
// update the posteriors vector
|
||||
final double log10LofK = kMinus0[numSamples];
|
||||
log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
|
||||
log10AlleleFrequencyLikelihoods[0][k] = log10LofK;
|
||||
log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k];
|
||||
|
||||
// can we abort early?
|
||||
lastK = k;
|
||||
|
|
@ -222,202 +580,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
|
||||
final static double approximateLog10SumLog10(double a, double b, double c) {
|
||||
//return softMax(new double[]{a, b, c});
|
||||
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
|
||||
}
|
||||
*/
|
||||
|
||||
final static double approximateLog10SumLog10(double small, double big) {
|
||||
// make sure small is really the smaller value
|
||||
if ( small > big ) {
|
||||
final double t = big;
|
||||
big = small;
|
||||
small = t;
|
||||
}
|
||||
|
||||
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
|
||||
return big;
|
||||
|
||||
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
|
||||
return big;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup
|
||||
// with integer quantization
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||
|
||||
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
||||
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
||||
return big + MathUtils.jacobianLogTable[ind];
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Can be overridden by concrete subclasses
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param log10AlleleFrequencyPosteriors allele frequency results
|
||||
* @param AFofMaxLikelihood allele frequency of max likelihood
|
||||
*
|
||||
* @return calls
|
||||
*/
|
||||
public GenotypesContext assignGenotypes(VariantContext vc,
|
||||
double[] log10AlleleFrequencyPosteriors,
|
||||
int AFofMaxLikelihood) {
|
||||
if ( !vc.isVariant() )
|
||||
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
|
||||
|
||||
|
||||
GenotypesContext GLs = vc.getGenotypes();
|
||||
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
|
||||
int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1];
|
||||
|
||||
ArrayList<String> sampleIndices = new ArrayList<String>();
|
||||
int sampleIdx = 0;
|
||||
|
||||
// todo - optimize initialization
|
||||
for (int k=0; k <= AFofMaxLikelihood; k++)
|
||||
for (int j=0; j <= GLs.size(); j++)
|
||||
pathMetricArray[j][k] = -1e30;
|
||||
|
||||
pathMetricArray[0][0] = 0.0;
|
||||
|
||||
// todo = can't deal with optimal dynamic programming solution with multiallelic records
|
||||
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
|
||||
sampleIndices.addAll(GLs.getSampleNamesOrderedByName());
|
||||
sampleIdx = GLs.size();
|
||||
}
|
||||
else {
|
||||
|
||||
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
|
||||
if ( !genotype.hasLikelihoods() )
|
||||
continue;
|
||||
|
||||
double[] likelihoods = genotype.getLikelihoods().getAsVector();
|
||||
|
||||
if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) {
|
||||
//System.out.print(sample.getKey()+":");
|
||||
//for (int k=0; k < likelihoods.length; k++)
|
||||
// System.out.format("%4.2f ",likelihoods[k]);
|
||||
//System.out.println();
|
||||
// all likelihoods are essentially the same: skip this sample and will later on force no call.
|
||||
//sampleIdx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
sampleIndices.add(genotype.getSampleName());
|
||||
|
||||
for (int k=0; k <= AFofMaxLikelihood; k++) {
|
||||
|
||||
double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0];
|
||||
int bestIndex = k;
|
||||
|
||||
if (k>0) {
|
||||
double m2 = pathMetricArray[sampleIdx][k-1] + likelihoods[1];
|
||||
if (m2 > bestMetric) {
|
||||
bestMetric = m2;
|
||||
bestIndex = k-1;
|
||||
}
|
||||
}
|
||||
|
||||
if (k>1) {
|
||||
double m2 = pathMetricArray[sampleIdx][k-2] + likelihoods[2];
|
||||
if (m2 > bestMetric) {
|
||||
bestMetric = m2;
|
||||
bestIndex = k-2;
|
||||
}
|
||||
}
|
||||
|
||||
pathMetricArray[sampleIdx+1][k] = bestMetric;
|
||||
tracebackArray[sampleIdx+1][k] = bestIndex;
|
||||
}
|
||||
sampleIdx++;
|
||||
}
|
||||
}
|
||||
|
||||
GenotypesContext calls = GenotypesContext.create();
|
||||
|
||||
int startIdx = AFofMaxLikelihood;
|
||||
for (int k = sampleIdx; k > 0; k--) {
|
||||
int bestGTguess;
|
||||
String sample = sampleIndices.get(k-1);
|
||||
Genotype g = GLs.get(sample);
|
||||
if ( !g.hasLikelihoods() )
|
||||
continue;
|
||||
// if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now,
|
||||
// and will add no-call genotype to GL's in a second pass
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||
|
||||
double[] likelihoods = g.getLikelihoods().getAsVector();
|
||||
|
||||
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
|
||||
bestGTguess = Utils.findIndexOfMaxEntry(likelihoods);
|
||||
}
|
||||
else {
|
||||
int newIdx = tracebackArray[k][startIdx];;
|
||||
bestGTguess = startIdx - newIdx;
|
||||
startIdx = newIdx;
|
||||
}
|
||||
|
||||
// likelihoods are stored row-wise in lower triangular matrix. IE
|
||||
// for 2 alleles they have ordering AA,AB,BB
|
||||
// for 3 alleles they are ordered AA,AB,BB,AC,BC,CC
|
||||
// Get now alleles corresponding to best index
|
||||
int kk=0;
|
||||
boolean done = false;
|
||||
for (int j=0; j < vc.getNAlleles(); j++) {
|
||||
for (int i=0; i <= j; i++){
|
||||
if (kk++ == bestGTguess) {
|
||||
if (i==0)
|
||||
myAlleles.add(vc.getReference());
|
||||
else
|
||||
myAlleles.add(vc.getAlternateAllele(i-1));
|
||||
|
||||
if (j==0)
|
||||
myAlleles.add(vc.getReference());
|
||||
else
|
||||
myAlleles.add(vc.getAlternateAllele(j-1));
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
if (done)
|
||||
break;
|
||||
}
|
||||
|
||||
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(bestGTguess, likelihoods);
|
||||
//System.out.println(myAlleles.toString());
|
||||
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
||||
}
|
||||
|
||||
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
|
||||
if ( !genotype.hasLikelihoods() )
|
||||
continue;
|
||||
|
||||
final Genotype g = GLs.get(genotype.getSampleName());
|
||||
final double[] likelihoods = genotype.getLikelihoods().getAsVector();
|
||||
|
||||
if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
|
||||
continue; // regular likelihoods
|
||||
|
||||
final double qual = Genotype.NO_LOG10_PERROR;
|
||||
calls.replace(new Genotype(g.getSampleName(), NO_CALL_ALLELES, qual, null, g.getAttributes(), false));
|
||||
}
|
||||
|
||||
return calls;
|
||||
}
|
||||
|
||||
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
||||
int j = logYMatrix.length - 1;
|
||||
System.out.printf("-----------------------------------%n");
|
||||
for (int k=0; k <= numChr; k++) {
|
||||
double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
||||
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -79,19 +80,17 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
* @param contexts stratified alignment contexts
|
||||
* @param contextType stratified context type
|
||||
* @param priors priors to use for GLs
|
||||
* @param GLs hash of sample->GL to fill in
|
||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
||||
* @return genotype likelihoods per sample for AA, AB, BB
|
||||
* @return variant context where genotypes are no-called but with GLs
|
||||
*/
|
||||
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup);
|
||||
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup);
|
||||
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
int count = 0;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// get deletion length
|
||||
int dLen = Integer.valueOf(bestAltAllele.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart());
|
||||
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
|
||||
|
||||
//System.out.println(new String(ref.getBases()));
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
||||
|
|
@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
|
||||
|
||||
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
Allele refAllele, altAllele;
|
||||
VariantContext vc = null;
|
||||
|
|
@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||
ref, hsize, numPrefBases);
|
||||
|
||||
// start making the VariantContext
|
||||
final int endLoc = calculateEndPos(alleleList, refAllele, loc);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
|
||||
|
||||
// create the genotypes; no-call everyone for now
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
final List<Allele> noCall = new ArrayList<Allele>();
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
||||
// For each sample, get genotype likelihoods based on pileup
|
||||
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
||||
// initialize the GenotypeLikelihoods
|
||||
GLs.clear();
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
|
@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
if (pileup != null ) {
|
||||
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
|
||||
|
||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||
alleleList,
|
||||
genotypeLikelihoods,
|
||||
getFilteredDepth(pileup)));
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
|
||||
|
|
@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
}
|
||||
|
||||
return refAllele;
|
||||
return builder.genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||
// for indels, stop location is one more than ref allele length
|
||||
boolean hasNullAltAllele = false;
|
||||
for ( Allele a : alleles ) {
|
||||
if ( a.isNull() ) {
|
||||
hasNullAltAllele = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int endLoc = loc.getStart() + refAllele.length();
|
||||
if( !hasNullAltAllele )
|
||||
endLoc--;
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
|
||||
return indelLikelihoodMap.get();
|
||||
|
|
|
|||
|
|
@ -1,52 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: delangel
|
||||
* Date: 6/1/11
|
||||
* Time: 10:38 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MultiallelicGenotypeLikelihoods {
|
||||
private String sample;
|
||||
private double[] GLs;
|
||||
private List<Allele> alleleList;
|
||||
private int depth;
|
||||
|
||||
public MultiallelicGenotypeLikelihoods(String sample,
|
||||
List<Allele> A,
|
||||
double[] log10Likelihoods, int depth) {
|
||||
/* Check for consistency between likelihood vector and number of alleles */
|
||||
int numAlleles = A.size();
|
||||
if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
|
||||
throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
|
||||
|
||||
this.sample = sample;
|
||||
this.alleleList = A;
|
||||
this.GLs = log10Likelihoods;
|
||||
this.depth = depth;
|
||||
}
|
||||
|
||||
public String getSample() {
|
||||
return sample;
|
||||
}
|
||||
|
||||
public double[] getLikelihoods() {
|
||||
return GLs;
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles() {
|
||||
return alleleList;
|
||||
}
|
||||
|
||||
public int getDepth() {
|
||||
return depth;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
|
||||
// the alternate allele with the largest sum of quality scores
|
||||
protected Byte bestAlternateAllele = null;
|
||||
private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50;
|
||||
|
||||
private boolean ALLOW_MULTIPLE_ALLELES;
|
||||
|
||||
private final boolean useAlleleFromVCF;
|
||||
|
||||
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
|
||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
}
|
||||
|
||||
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
|
||||
if ( !(priors instanceof DiploidSNPGenotypePriors) )
|
||||
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
|
||||
|
||||
byte refBase = ref.getBase();
|
||||
Allele refAllele = Allele.create(refBase, true);
|
||||
final boolean[] basesToUse = new boolean[4];
|
||||
final byte refBase = ref.getBase();
|
||||
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||
|
||||
// find the alternate allele with the largest sum of quality scores
|
||||
// start making the VariantContext
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(Allele.create(refBase, true));
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
|
||||
|
||||
// find the alternate allele(s) that we should be using
|
||||
if ( alternateAlleleToUse != null ) {
|
||||
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
|
||||
basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
|
||||
} else if ( useAlleleFromVCF ) {
|
||||
VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
|
||||
final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
|
||||
|
||||
// ignore places where we don't have a variant
|
||||
if ( vc == null )
|
||||
// ignore places where we don't have a SNP
|
||||
if ( vc == null || !vc.isSNP() )
|
||||
return null;
|
||||
|
||||
if ( !vc.isBiallelic() ) {
|
||||
// for multi-allelic sites go back to the reads and find the most likely alternate allele
|
||||
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
|
||||
} else {
|
||||
bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0];
|
||||
}
|
||||
for ( Allele allele : vc.getAlternateAlleles() )
|
||||
basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
|
||||
} else {
|
||||
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
|
||||
|
||||
determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);
|
||||
|
||||
// how many alternate alleles are we using?
|
||||
int alleleCounter = countSetBits(basesToUse);
|
||||
|
||||
// if there are no non-ref alleles...
|
||||
if ( alleleCounter == 0 ) {
|
||||
// if we only want variants, then we don't need to calculate genotype likelihoods
|
||||
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
||||
return builder.make();
|
||||
|
||||
// otherwise, choose any alternate allele (it doesn't really matter)
|
||||
basesToUse[indexOfRefBase == 0 ? 1 : 0] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// if there are no non-ref bases...
|
||||
if ( bestAlternateAllele == null ) {
|
||||
// if we only want variants, then we don't need to calculate genotype likelihoods
|
||||
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
||||
return refAllele;
|
||||
|
||||
// otherwise, choose any alternate allele (it doesn't really matter)
|
||||
bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C');
|
||||
// create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
|
||||
final int numAltAlleles = countSetBits(basesToUse);
|
||||
final int[] alleleOrdering = new int[numAltAlleles + 1];
|
||||
alleleOrdering[0] = indexOfRefBase;
|
||||
int alleleOrderingIndex = 1;
|
||||
int numLikelihoods = 1;
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
if ( i != indexOfRefBase && basesToUse[i] ) {
|
||||
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
|
||||
alleleOrdering[alleleOrderingIndex++] = i;
|
||||
numLikelihoods += alleleOrderingIndex;
|
||||
}
|
||||
}
|
||||
builder.alleles(alleles);
|
||||
|
||||
Allele altAllele = Allele.create(bestAlternateAllele, false);
|
||||
// create the genotypes; no-call everyone for now
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
final List<Allele> noCall = new ArrayList<Allele>();
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); }
|
||||
if ( useBAQedPileup )
|
||||
pileup = createBAQedPileup( pileup );
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
||||
int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
||||
final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
if ( nGoodBases == 0 )
|
||||
continue;
|
||||
|
||||
double[] likelihoods = GL.getLikelihoods();
|
||||
final double[] allLikelihoods = GL.getLikelihoods();
|
||||
final double[] myLikelihoods = new double[numLikelihoods];
|
||||
|
||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase);
|
||||
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele);
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele);
|
||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||
aList.add(refAllele);
|
||||
aList.add(altAllele);
|
||||
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
|
||||
int myLikelihoodsIndex = 0;
|
||||
for ( int i = 0; i <= numAltAlleles; i++ ) {
|
||||
for ( int j = i; j <= numAltAlleles; j++ ) {
|
||||
myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
|
||||
}
|
||||
}
|
||||
|
||||
// normalize in log space so that max element is zero.
|
||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
|
||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
||||
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
}
|
||||
|
||||
return refAllele;
|
||||
return builder.genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
protected void initializeBestAlternateAllele(byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
|
||||
private int countSetBits(boolean[] array) {
|
||||
int counter = 0;
|
||||
for ( int i = 0; i < array.length; i++ ) {
|
||||
if ( array[i] )
|
||||
counter++;
|
||||
}
|
||||
return counter;
|
||||
}
|
||||
|
||||
// fills in the allelesToUse array
|
||||
protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
|
||||
int[] qualCounts = new int[4];
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
|
|
@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
// ignore deletions
|
||||
if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE ))
|
||||
if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) )
|
||||
continue;
|
||||
|
||||
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
|
||||
|
|
@ -149,17 +189,31 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
}
|
||||
}
|
||||
|
||||
// set the non-ref base with maximum quality score sum
|
||||
int maxCount = 0;
|
||||
bestAlternateAllele = null;
|
||||
for ( byte altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele == ref )
|
||||
continue;
|
||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||
if ( qualCounts[index] > maxCount ) {
|
||||
maxCount = qualCounts[index];
|
||||
bestAlternateAllele = altAllele;
|
||||
if ( ALLOW_MULTIPLE_ALLELES ) {
|
||||
for ( byte altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele == ref )
|
||||
continue;
|
||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||
if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) {
|
||||
allelesToUse[index] = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// set the non-ref base which has the maximum quality score sum
|
||||
int maxCount = 0;
|
||||
int indexOfMax = 0;
|
||||
for ( byte altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele == ref )
|
||||
continue;
|
||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||
if ( qualCounts[index] > maxCount ) {
|
||||
maxCount = qualCounts[index];
|
||||
indexOfMax = index;
|
||||
}
|
||||
}
|
||||
|
||||
if ( maxCount > 0 )
|
||||
allelesToUse[indexOfMax] = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -153,6 +153,18 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
|
||||
public boolean IGNORE_SNP_ALLELES = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow multiple alleles in discovery", required = false)
|
||||
public boolean MULTI_ALLELIC = false;
|
||||
|
||||
/**
|
||||
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
||||
* then this site will be skipped and a warning printed. Note that genotyping sites with many alternate alleles is both CPU and memory intensive.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||
public int MAX_ALTERNATE_ALLELES = 5;
|
||||
|
||||
|
||||
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
|
||||
public UnifiedArgumentCollection clone() {
|
||||
|
|
@ -176,10 +188,12 @@ public class UnifiedArgumentCollection {
|
|||
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
||||
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
||||
uac.alleles = alleles;
|
||||
uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES;
|
||||
|
||||
// todo- arguments to remove
|
||||
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
||||
uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
|
||||
uac.MULTI_ALLELIC = MULTI_ALLELIC;
|
||||
return uac;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,9 @@ public class UnifiedGenotyperEngine {
|
|||
EMIT_ALL_SITES
|
||||
}
|
||||
|
||||
protected static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
protected static final double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
|
||||
// the unified argument collection
|
||||
private final UnifiedArgumentCollection UAC;
|
||||
public UnifiedArgumentCollection getUAC() { return UAC; }
|
||||
|
|
@ -73,11 +76,12 @@ public class UnifiedGenotyperEngine {
|
|||
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
|
||||
|
||||
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
|
||||
private final double[] log10AlleleFrequencyPriorsSNPs;
|
||||
private final double[] log10AlleleFrequencyPriorsIndels;
|
||||
private final double[][] log10AlleleFrequencyPriorsSNPs;
|
||||
private final double[][] log10AlleleFrequencyPriorsIndels;
|
||||
|
||||
// the allele frequency likelihoods (allocated once as an optimization)
|
||||
private ThreadLocal<double[]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[]>();
|
||||
private ThreadLocal<double[][]> log10AlleleFrequencyLikelihoods = new ThreadLocal<double[][]>();
|
||||
private ThreadLocal<double[][]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[][]>();
|
||||
|
||||
// the priors object
|
||||
private final GenotypePriors genotypePriorsSNPs;
|
||||
|
|
@ -96,6 +100,7 @@ public class UnifiedGenotyperEngine {
|
|||
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
||||
private static final Set<String> filter = new HashSet<String>(1);
|
||||
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
private final boolean BAQEnabledOnCMDLine;
|
||||
|
||||
|
||||
|
|
@ -113,6 +118,7 @@ public class UnifiedGenotyperEngine {
|
|||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
|
||||
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
||||
genomeLocParser = toolkit.getGenomeLocParser();
|
||||
this.samples = new TreeSet<String>(samples);
|
||||
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
|
||||
this.UAC = UAC.clone();
|
||||
|
|
@ -122,10 +128,10 @@ public class UnifiedGenotyperEngine {
|
|||
this.annotationEngine = engine;
|
||||
|
||||
N = 2 * this.samples.size();
|
||||
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
||||
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
|
||||
log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
|
||||
genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
|
||||
|
|
@ -152,7 +158,6 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
||||
|
||||
if ( vc == null )
|
||||
return null;
|
||||
|
||||
|
|
@ -214,14 +219,7 @@ public class UnifiedGenotyperEngine {
|
|||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||
}
|
||||
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>();
|
||||
|
||||
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
||||
|
||||
if ( refAllele != null )
|
||||
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
|
||||
else
|
||||
return null;
|
||||
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
||||
}
|
||||
|
||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||
|
|
@ -256,136 +254,170 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vc, false);
|
||||
}
|
||||
|
||||
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
|
||||
// no-call everyone for now
|
||||
List<Allele> noCall = new ArrayList<Allele>();
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
||||
Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||
alleles.add(refAllele);
|
||||
boolean addedAltAlleles = false;
|
||||
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
|
||||
if ( !addedAltAlleles ) {
|
||||
addedAltAlleles = true;
|
||||
// ordering important to maintain consistency
|
||||
for (Allele a: GL.getAlleles()) {
|
||||
alleles.add(a);
|
||||
}
|
||||
}
|
||||
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
//GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
|
||||
genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
}
|
||||
|
||||
GenomeLoc loc = refContext.getLocus();
|
||||
int endLoc = calculateEndPos(alleles, refAllele, loc);
|
||||
|
||||
return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make();
|
||||
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
return calculateGenotypes(null, null, null, null, vc, model);
|
||||
}
|
||||
|
||||
// private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine
|
||||
private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( afcm.get() == null ) {
|
||||
log10AlleleFrequencyPosteriors.set(new double[N+1]);
|
||||
log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
|
||||
log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
|
||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||
}
|
||||
|
||||
// don't try to genotype too many alternate alleles
|
||||
if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) {
|
||||
logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument");
|
||||
return null;
|
||||
}
|
||||
|
||||
// estimate our confidence in a reference call and return
|
||||
if ( vc.getNSamples() == 0 )
|
||||
if ( vc.getNSamples() == 0 ) {
|
||||
if ( limitedContext )
|
||||
return null;
|
||||
return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ?
|
||||
estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) :
|
||||
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
|
||||
}
|
||||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// find the most likely frequency
|
||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
||||
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||
boolean bestGuessIsRef = true;
|
||||
|
||||
// which alternate allele has the highest MLE AC?
|
||||
int indexOfHighestAlt = -1;
|
||||
int alleleCountOfHighestAlt = -1;
|
||||
|
||||
// determine which alternate alleles have AF>0
|
||||
boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
|
||||
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||
int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]);
|
||||
|
||||
// if the most likely AC is not 0, then this is a good alternate allele to use
|
||||
if ( indexOfBestAC != 0 ) {
|
||||
altAllelesToUse[i] = true;
|
||||
bestGuessIsRef = false;
|
||||
}
|
||||
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
|
||||
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
altAllelesToUse[i] = true;
|
||||
}
|
||||
|
||||
// keep track of the "best" alternate allele to use
|
||||
if ( indexOfBestAC > alleleCountOfHighestAlt) {
|
||||
alleleCountOfHighestAlt = indexOfBestAC;
|
||||
indexOfHighestAlt = i;
|
||||
}
|
||||
}
|
||||
|
||||
// calculate p(f>0)
|
||||
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
|
||||
// TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs
|
||||
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]);
|
||||
double sum = 0.0;
|
||||
for (int i = 1; i <= N; i++)
|
||||
sum += normalizedPosteriors[i];
|
||||
double PofF = Math.min(sum, 1.0); // deal with precision errors
|
||||
|
||||
double phredScaledConfidence;
|
||||
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
|
||||
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0];
|
||||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
sum = 0.0;
|
||||
for (int i = 1; i <= N; i++) {
|
||||
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
break;
|
||||
sum += log10AlleleFrequencyPosteriors.get()[i];
|
||||
sum += log10AlleleFrequencyPosteriors.get()[0][i];
|
||||
}
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
}
|
||||
}
|
||||
|
||||
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
||||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
|
||||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) {
|
||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
||||
return estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
|
||||
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
|
||||
}
|
||||
|
||||
// strip out any alleles that aren't going to be used in the VariantContext
|
||||
List<Allele> myAlleles;
|
||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
||||
myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
|
||||
myAlleles.add(vc.getReference());
|
||||
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||
if ( altAllelesToUse[i] )
|
||||
myAlleles.add(vc.getAlternateAllele(i));
|
||||
}
|
||||
} else {
|
||||
// use all of the alleles if we are given them by the user
|
||||
myAlleles = vc.getAlleles();
|
||||
}
|
||||
|
||||
// start constructing the resulting VC
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
|
||||
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
|
||||
builder.log10PError(phredScaledConfidence/-10.0);
|
||||
if ( ! passesCallThreshold(phredScaledConfidence) )
|
||||
builder.filters(filter);
|
||||
if ( !limitedContext )
|
||||
builder.referenceBaseForIndel(refContext.getBase());
|
||||
|
||||
// create the genotypes
|
||||
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||
GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
|
||||
|
||||
// print out stats if we have a writer
|
||||
if ( verboseWriter != null )
|
||||
if ( verboseWriter != null && !limitedContext )
|
||||
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
// if the site was downsampled, record that fact
|
||||
if ( rawContext.hasPileupBeenDownsampled() )
|
||||
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
|
||||
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
|
||||
|
||||
|
||||
if ( UAC.COMPUTE_SLOD && bestAFguess != 0 ) {
|
||||
if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
|
||||
//final boolean DEBUG_SLOD = false;
|
||||
|
||||
// the overall lod
|
||||
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
|
||||
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
|
||||
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
|
|
@ -401,26 +433,12 @@ public class UnifiedGenotyperEngine {
|
|||
attributes.put("SB", strandScore);
|
||||
}
|
||||
|
||||
GenomeLoc loc = refContext.getLocus();
|
||||
|
||||
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
|
||||
|
||||
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
|
||||
// strip out the alternate allele if it's a ref call
|
||||
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
||||
myAlleles = new HashSet<Allele>(1);
|
||||
myAlleles.add(vc.getReference());
|
||||
}
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles);
|
||||
// finish constructing the resulting VC
|
||||
builder.genotypes(genotypes);
|
||||
builder.log10PError(phredScaledConfidence/-10.0);
|
||||
if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter);
|
||||
builder.attributes(attributes);
|
||||
builder.referenceBaseForIndel(refContext.getBase());
|
||||
VariantContext vcCall = builder.make();
|
||||
|
||||
if ( annotationEngine != null ) {
|
||||
if ( annotationEngine != null && !limitedContext ) {
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
ReadBackedPileup pileup = null;
|
||||
if (rawContext.hasExtendedEventPileup())
|
||||
|
|
@ -435,119 +453,6 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
// A barebones entry point to the exact model when there is no tracker or stratified contexts available -- only GLs
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( afcm.get() == null ) {
|
||||
log10AlleleFrequencyPosteriors.set(new double[N+1]);
|
||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||
}
|
||||
|
||||
// estimate our confidence in a reference call and return
|
||||
if ( vc.getNSamples() == 0 )
|
||||
return null;
|
||||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// find the most likely frequency
|
||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// calculate p(f>0)
|
||||
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
|
||||
double sum = 0.0;
|
||||
for (int i = 1; i <= N; i++)
|
||||
sum += normalizedPosteriors[i];
|
||||
double PofF = Math.min(sum, 1.0); // deal with precision errors
|
||||
|
||||
double phredScaledConfidence;
|
||||
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
|
||||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
sum = 0.0;
|
||||
for (int i = 1; i <= N; i++) {
|
||||
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
break;
|
||||
sum += log10AlleleFrequencyPosteriors.get()[i];
|
||||
}
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
}
|
||||
}
|
||||
|
||||
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
||||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
|
||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
||||
return null;
|
||||
}
|
||||
|
||||
// create the genotypes
|
||||
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
|
||||
|
||||
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
|
||||
// strip out the alternate allele if it's a ref call
|
||||
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
||||
myAlleles = new HashSet<Allele>(1);
|
||||
myAlleles.add(vc.getReference());
|
||||
}
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles);
|
||||
builder.genotypes(genotypes);
|
||||
builder.log10PError(phredScaledConfidence/-10.0);
|
||||
if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter);
|
||||
builder.attributes(attributes);
|
||||
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
|
||||
|
||||
return new VariantCallContext(builder.make(), confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||
// TODO - temp fix until we can deal with extended events properly
|
||||
// for indels, stop location is one more than ref allele length
|
||||
boolean isSNP = true, hasNullAltAllele = false;
|
||||
for (Allele a : alleles){
|
||||
if (a.length() != 1) {
|
||||
isSNP = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (Allele a : alleles){
|
||||
if (a.isNull()) {
|
||||
hasNullAltAllele = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
|
||||
// standard insertion: ref allele length = 0, endLos = startLoc
|
||||
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
|
||||
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
|
||||
//
|
||||
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
|
||||
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
|
||||
|
||||
|
||||
int endLoc = loc.getStart();
|
||||
if ( !isSNP) {
|
||||
endLoc += refAllele.length();
|
||||
if(!hasNullAltAllele)
|
||||
endLoc--;
|
||||
|
||||
}
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
Map<String, AlignmentContext> stratifiedContexts = null;
|
||||
|
|
@ -604,9 +509,12 @@ public class UnifiedGenotyperEngine {
|
|||
return stratifiedContexts;
|
||||
}
|
||||
|
||||
protected static void clearAFarray(double[] AFs) {
|
||||
for ( int i = 0; i < AFs.length; i++ )
|
||||
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
protected static void clearAFarray(double[][] AFs) {
|
||||
for ( int i = 0; i < AFs.length; i++ ) {
|
||||
for ( int j = 0; j < AFs[i].length; j++ ) {
|
||||
AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final static double[] binomialProbabilityDepthCache = new double[10000];
|
||||
|
|
@ -676,7 +584,7 @@ public class UnifiedGenotyperEngine {
|
|||
AFline.append(i + "/" + N + "\t");
|
||||
AFline.append(String.format("%.2f\t", ((float)i)/N));
|
||||
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
|
||||
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
|
||||
if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
|
||||
AFline.append("0.00000000\t");
|
||||
else
|
||||
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i]));
|
||||
|
|
@ -689,8 +597,8 @@ public class UnifiedGenotyperEngine {
|
|||
verboseWriter.println();
|
||||
}
|
||||
|
||||
protected boolean passesEmitThreshold(double conf, int bestAFguess) {
|
||||
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||
protected boolean passesEmitThreshold(double conf, boolean bestGuessIsRef) {
|
||||
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||
}
|
||||
|
||||
protected boolean passesCallThreshold(double conf) {
|
||||
|
|
@ -744,27 +652,25 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
}
|
||||
|
||||
protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
// calculate the allele frequency priors for 1-N
|
||||
double sum = 0.0;
|
||||
double heterozygosity;
|
||||
protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) {
|
||||
|
||||
if (model == GenotypeLikelihoodsCalculationModel.Model.INDEL)
|
||||
heterozygosity = UAC.INDEL_HETEROZYGOSITY;
|
||||
else
|
||||
heterozygosity = UAC.heterozygosity;
|
||||
|
||||
for (int i = 1; i <= N; i++) {
|
||||
double value = heterozygosity / (double)i;
|
||||
priors[i] = Math.log10(value);
|
||||
sum += value;
|
||||
// the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i
|
||||
for (int alleles = 1; alleles <= priors.length; alleles++) {
|
||||
double sum = 0.0;
|
||||
|
||||
// for each i
|
||||
for (int i = 1; i <= N; i++) {
|
||||
double value = Math.pow(theta, alleles) / (double)i;
|
||||
priors[alleles-1][i] = Math.log10(value);
|
||||
sum += value;
|
||||
}
|
||||
|
||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||
priors[alleles-1][0] = Math.log10(1.0 - sum);
|
||||
}
|
||||
|
||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||
priors[0] = Math.log10(1.0 - sum);
|
||||
}
|
||||
|
||||
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
switch( model ) {
|
||||
case SNP:
|
||||
return log10AlleleFrequencyPriorsSNPs;
|
||||
|
|
@ -837,4 +743,88 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
return vc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use
|
||||
*
|
||||
* @return genotypes
|
||||
*/
|
||||
public GenotypesContext assignGenotypes(VariantContext vc,
|
||||
boolean[] allelesToUse) {
|
||||
|
||||
final GenotypesContext GLs = vc.getGenotypes();
|
||||
|
||||
final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
|
||||
|
||||
final GenotypesContext calls = GenotypesContext.create();
|
||||
|
||||
for ( int k = GLs.size() - 1; k >= 0; k-- ) {
|
||||
final String sample = sampleIndices.get(k);
|
||||
final Genotype g = GLs.get(sample);
|
||||
if ( !g.hasLikelihoods() )
|
||||
continue;
|
||||
|
||||
final double[] likelihoods = g.getLikelihoods().getAsVector();
|
||||
|
||||
// if there is no mass on the likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
continue;
|
||||
}
|
||||
|
||||
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
|
||||
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
|
||||
|
||||
final int numAltAlleles = allelesToUse.length;
|
||||
|
||||
// start with the assumption that the ideal genotype is homozygous reference
|
||||
Allele maxAllele1 = vc.getReference(), maxAllele2 = vc.getReference();
|
||||
double maxLikelihoodSeen = likelihoods[0];
|
||||
int indexOfMax = 0;
|
||||
|
||||
// keep track of some state
|
||||
Allele firstAllele = vc.getReference();
|
||||
int subtractor = numAltAlleles + 1;
|
||||
int subtractionsMade = 0;
|
||||
|
||||
for ( int i = 1, PLindex = 1; i < likelihoods.length; i++, PLindex++ ) {
|
||||
if ( PLindex == subtractor ) {
|
||||
firstAllele = vc.getAlternateAllele(subtractionsMade);
|
||||
PLindex -= subtractor;
|
||||
subtractor--;
|
||||
subtractionsMade++;
|
||||
|
||||
// we can skip this allele if it's not usable
|
||||
if ( !allelesToUse[subtractionsMade-1] ) {
|
||||
i += subtractor - 1;
|
||||
PLindex += subtractor - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// we don't care about the entry if we've already seen better
|
||||
if ( likelihoods[i] <= maxLikelihoodSeen )
|
||||
continue;
|
||||
|
||||
// if it's usable then update the alleles
|
||||
int alleleIndex = subtractionsMade + PLindex - 1;
|
||||
if ( allelesToUse[alleleIndex] ) {
|
||||
maxAllele1 = firstAllele;
|
||||
maxAllele2 = vc.getAlternateAllele(alleleIndex);
|
||||
maxLikelihoodSeen = likelihoods[i];
|
||||
indexOfMax = i;
|
||||
}
|
||||
}
|
||||
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||
myAlleles.add(maxAllele1);
|
||||
myAlleles.add(maxAllele2);
|
||||
|
||||
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(indexOfMax, likelihoods);
|
||||
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
||||
}
|
||||
|
||||
return calls;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -320,17 +320,17 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
if(transmissionProb != NO_TRANSMISSION_PROB)
|
||||
phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb));
|
||||
|
||||
//Handle null, missing and unavailable genotypes
|
||||
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
|
||||
//genotype so it is safe to return the original genotype in this case.
|
||||
//In addition, if the phasing confidence is 0, then return the unphased, original genotypes.
|
||||
if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType()))
|
||||
return genotype;
|
||||
//Handle null, missing and unavailable genotypes
|
||||
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
|
||||
//genotype so it is safe to return the original genotype in this case.
|
||||
//In addition, if the phasing confidence is 0, then return the unphased, original genotypes.
|
||||
if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType()))
|
||||
return genotype;
|
||||
|
||||
//Add the transmission probability
|
||||
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
genotypeAttributes.putAll(genotype.getAttributes());
|
||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
||||
//Add the transmission probability
|
||||
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
genotypeAttributes.putAll(genotype.getAttributes());
|
||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
||||
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
|
||||
|
||||
ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2);
|
||||
|
|
|
|||
|
|
@ -164,13 +164,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
|
||||
protected double MIN_PHASE_QUALITY = 10.0;
|
||||
|
||||
/**
|
||||
* This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined.
|
||||
*/
|
||||
@Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false)
|
||||
protected String FAMILY_STRUCTURE;
|
||||
|
||||
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
||||
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50.", required=false)
|
||||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||
|
||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||
|
|
@ -561,8 +555,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
|
||||
public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; }
|
||||
|
||||
public String getFamilyStructure() { return FAMILY_STRUCTURE; }
|
||||
|
||||
public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; }
|
||||
|
||||
public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; }
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -7,9 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Mendelian violation detection and counting
|
||||
|
|
@ -40,12 +44,25 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
|
||||
public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||
|
||||
@DataPoint(description = "Number of mendelian variants found")
|
||||
@DataPoint(description = "Number of variants found with at least one family having genotypes")
|
||||
long nVariants;
|
||||
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall")
|
||||
long nSkipped;
|
||||
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)")
|
||||
long nFamCalled;
|
||||
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.")
|
||||
long nVarFamCalled;
|
||||
@DataPoint(description="Number of variants x families discarded as low quality")
|
||||
long nLowQual;
|
||||
@DataPoint(description="Number of variants x families discarded as no call")
|
||||
long nNoCall;
|
||||
@DataPoint(description="Number of loci with mendelian violations")
|
||||
long nLociViolations;
|
||||
@DataPoint(description = "Number of mendelian violations found")
|
||||
long nViolations;
|
||||
|
||||
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
||||
|
||||
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
||||
long KidHomRef_ParentHomVar;
|
||||
@DataPoint(description = "number of child het calls where the parent was hom ref")
|
||||
long KidHet_ParentsHomRef;
|
||||
|
|
@ -53,11 +70,65 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
long KidHet_ParentsHomVar;
|
||||
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
||||
long KidHomVar_ParentHomRef;
|
||||
*/
|
||||
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR")
|
||||
long mvRefRef_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET")
|
||||
long mvRefRef_Het;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR")
|
||||
long mvRefHet_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR")
|
||||
long mvRefVar_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF")
|
||||
long mvRefVar_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF")
|
||||
long mvVarHet_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF")
|
||||
long mvVarVar_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET")
|
||||
long mvVarVar_Het;
|
||||
|
||||
|
||||
/*@DataPoint(description ="Number of inherited var alleles from het parents")
|
||||
long nInheritedVar;
|
||||
@DataPoint(description ="Number of inherited ref alleles from het parents")
|
||||
long nInheritedRef;*/
|
||||
|
||||
@DataPoint(description="Number of HomRef/HomRef/HomRef trios")
|
||||
long HomRefHomRef_HomRef;
|
||||
@DataPoint(description="Number of Het/Het/Het trios")
|
||||
long HetHet_Het;
|
||||
@DataPoint(description="Number of Het/Het/HomRef trios")
|
||||
long HetHet_HomRef;
|
||||
@DataPoint(description="Number of Het/Het/HomVar trios")
|
||||
long HetHet_HomVar;
|
||||
@DataPoint(description="Number of HomVar/HomVar/HomVar trios")
|
||||
long HomVarHomVar_HomVar;
|
||||
@DataPoint(description="Number of HomRef/HomVar/Het trios")
|
||||
long HomRefHomVAR_Het;
|
||||
@DataPoint(description="Number of ref alleles inherited from het/het parents")
|
||||
long HetHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from het/het parents")
|
||||
long HetHet_inheritedVar;
|
||||
@DataPoint(description="Number of ref alleles inherited from homRef/het parents")
|
||||
long HomRefHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from homRef/het parents")
|
||||
long HomRefHet_inheritedVar;
|
||||
@DataPoint(description="Number of ref alleles inherited from homVar/het parents")
|
||||
long HomVarHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from homVar/het parents")
|
||||
long HomVarHet_inheritedVar;
|
||||
|
||||
MendelianViolation mv;
|
||||
PrintStream mvFile;
|
||||
Map<String,Set<Sample>> families;
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||
//Changed by Laurent Francioli - 2011-06-07
|
||||
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
|
||||
families = walker.getSampleDB().getFamilies();
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
|
|
@ -75,110 +146,48 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
||||
if (mv.setAlleles(vc)) {
|
||||
|
||||
if(mv.countViolations(families,vc)>0){
|
||||
nLociViolations++;
|
||||
nViolations += mv.getViolationsCount();
|
||||
mvRefRef_Var += mv.getParentsRefRefChildVar();
|
||||
mvRefRef_Het += mv.getParentsRefRefChildHet();
|
||||
mvRefHet_Var += mv.getParentsRefHetChildVar();
|
||||
mvRefVar_Var += mv.getParentsRefVarChildVar();
|
||||
mvRefVar_Ref += mv.getParentsRefVarChildRef();
|
||||
mvVarHet_Ref += mv.getParentsVarHetChildRef();
|
||||
mvVarVar_Ref += mv.getParentsVarVarChildRef();
|
||||
mvVarVar_Het += mv.getParentsVarVarChildHet();
|
||||
|
||||
}
|
||||
HomRefHomRef_HomRef += mv.getRefRefRef();
|
||||
HetHet_Het += mv.getHetHetHet();
|
||||
HetHet_HomRef += mv.getHetHetHomRef();
|
||||
HetHet_HomVar += mv.getHetHetHomVar();
|
||||
HomVarHomVar_HomVar += mv.getVarVarVar();
|
||||
HomRefHomVAR_Het += mv.getRefVarHet();
|
||||
HetHet_inheritedRef += mv.getParentsHetHetInheritedRef();
|
||||
HetHet_inheritedVar += mv.getParentsHetHetInheritedVar();
|
||||
HomRefHet_inheritedRef += mv.getParentsRefHetInheritedRef();
|
||||
HomRefHet_inheritedVar += mv.getParentsRefHetInheritedVar();
|
||||
HomVarHet_inheritedRef += mv.getParentsVarHetInheritedRef();
|
||||
HomVarHet_inheritedVar += mv.getParentsVarHetInheritedVar();
|
||||
|
||||
if(mv.getFamilyCalledCount()>0){
|
||||
nVariants++;
|
||||
|
||||
Genotype momG = vc.getGenotype(mv.getSampleMom());
|
||||
Genotype dadG = vc.getGenotype(mv.getSampleDad());
|
||||
Genotype childG = vc.getGenotype(mv.getSampleChild());
|
||||
|
||||
if (mv.isViolation()) {
|
||||
nViolations++;
|
||||
|
||||
String label;
|
||||
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
|
||||
label = "KidHomRef_ParentHomVar";
|
||||
KidHomRef_ParentHomVar++;
|
||||
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
|
||||
label = "KidHet_ParentsHomRef";
|
||||
KidHet_ParentsHomRef++;
|
||||
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
|
||||
label = "KidHet_ParentsHomVar";
|
||||
KidHet_ParentsHomVar++;
|
||||
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
|
||||
label = "KidHomVar_ParentHomRef";
|
||||
KidHomVar_ParentHomRef++;
|
||||
} else {
|
||||
throw new ReviewedStingException("BUG: unexpected child genotype class " + childG);
|
||||
}
|
||||
|
||||
return "MendelViolation=" + label;
|
||||
}
|
||||
nFamCalled += mv.getFamilyCalledCount();
|
||||
nLowQual += mv.getFamilyLowQualsCount();
|
||||
nNoCall += mv.getFamilyNoCallCount();
|
||||
nVarFamCalled += mv.getVarFamilyCalledCount();
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any intersting sites
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
private double getQThreshold() {
|
||||
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
//return 0.0;
|
||||
}
|
||||
|
||||
TrioStructure trio;
|
||||
double mendelianViolationQualThreshold;
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public static class TrioStructure {
|
||||
public String mom, dad, child;
|
||||
}
|
||||
|
||||
public static TrioStructure parseTrioDescription(String family) {
|
||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||
if (m.matches()) {
|
||||
TrioStructure trio = new TrioStructure();
|
||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
||||
trio.mom = m.group(1);
|
||||
trio.dad = m.group(2);
|
||||
trio.child = m.group(3);
|
||||
return trio;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
trio = parseTrioDescription(walker.getFamilyStructure());
|
||||
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
|
||||
}
|
||||
|
||||
private boolean includeGenotype(Genotype g) {
|
||||
return g.getLog10PError() > getQThreshold() && g.isCalled();
|
||||
}
|
||||
|
||||
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
|
||||
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
|
||||
}
|
||||
|
||||
public static boolean isViolation(VariantContext vc, TrioStructure trio ) {
|
||||
return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) );
|
||||
}
|
||||
|
||||
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
|
||||
//VariantContext momVC = vc.subContextFromGenotypes(momG);
|
||||
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
|
||||
int i = 0;
|
||||
Genotype childG = new Genotype("kidG", childA);
|
||||
for (Allele momAllele : momA) {
|
||||
for (Allele dadAllele : dadA) {
|
||||
if (momAllele.isCalled() && dadAllele.isCalled()) {
|
||||
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
|
||||
if (childG.sameGenotype(possibleChild)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else{
|
||||
nSkipped++;
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return true;
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -41,7 +42,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -180,7 +180,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
/**
|
||||
|
|
@ -282,6 +282,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
|
||||
private double fractionRandom = 0;
|
||||
|
||||
@Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
|
||||
private double fractionGenotypes = 0;
|
||||
|
||||
/**
|
||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||
* When specified one or more times, a particular type of variant is selected.
|
||||
|
|
@ -325,7 +328,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
private boolean DISCORDANCE_ONLY = false;
|
||||
private boolean CONCORDANCE_ONLY = false;
|
||||
|
||||
private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>();
|
||||
private MendelianViolation mv;
|
||||
|
||||
|
||||
/* variables used by the SELECT RANDOM modules */
|
||||
|
|
@ -344,6 +347,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
private PrintStream outMVFileStream = null;
|
||||
|
||||
//Random number generator for the genotypes to remove
|
||||
private Random randomGenotypes = new Random();
|
||||
|
||||
/**
|
||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
|
|
@ -380,8 +385,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
for ( String sample : samples )
|
||||
logger.info("Including sample '" + sample + "'");
|
||||
|
||||
|
||||
|
||||
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
||||
if (TYPES_TO_INCLUDE.isEmpty()) {
|
||||
|
||||
|
|
@ -421,29 +424,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
|
||||
|
||||
if (MENDELIAN_VIOLATIONS) {
|
||||
if ( FAMILY_STRUCTURE_FILE != null) {
|
||||
try {
|
||||
for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) {
|
||||
MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||
if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom()))
|
||||
mvSet.add(mv);
|
||||
}
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
|
||||
}
|
||||
if (outMVFile != null)
|
||||
try {
|
||||
outMVFileStream = new PrintStream(outMVFile);
|
||||
}
|
||||
catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
||||
}
|
||||
else
|
||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
}
|
||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
MENDELIAN_VIOLATIONS = true;
|
||||
mv = new MendelianViolation(MENDELIAN_VIOLATION_QUAL_THRESHOLD,false,true);
|
||||
}
|
||||
|
||||
SELECT_RANDOM_NUMBER = numRandom > 0;
|
||||
|
|
@ -479,26 +460,26 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
for (VariantContext vc : vcs) {
|
||||
if (MENDELIAN_VIOLATIONS) {
|
||||
boolean foundMV = false;
|
||||
for (MendelianViolation mv : mvSet) {
|
||||
if (mv.isViolation(vc)) {
|
||||
foundMV = true;
|
||||
//System.out.println(vc.toString());
|
||||
if (outMVFile != null)
|
||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
|
||||
break;
|
||||
|
||||
if (outMVFile != null){
|
||||
for( String familyId : mv.getViolationFamilies()){
|
||||
for(Sample sample : this.getSampleDB().getFamily(familyId)){
|
||||
if(sample.getParents().size() > 0){
|
||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
||||
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(),
|
||||
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() );
|
||||
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
|
||||
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundMV)
|
||||
break;
|
||||
}
|
||||
|
||||
if (DISCORDANCE_ONLY) {
|
||||
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
|
||||
if (!isDiscordant(vc, compVCs))
|
||||
|
|
@ -629,6 +610,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
|
||||
@Override
|
||||
public Integer treeReduce(Integer lhs, Integer rhs) {
|
||||
return lhs + rhs;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
logger.info(result + " records processed.");
|
||||
|
||||
|
|
@ -657,9 +643,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||
|
||||
GenotypesContext newGC = sub.getGenotypes();
|
||||
|
||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||
builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes()));
|
||||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
|
||||
//Remove a fraction of the genotypes if needed
|
||||
if(fractionGenotypes>0){
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
for ( Genotype genotype : newGC ) {
|
||||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
}
|
||||
else{
|
||||
genotypes.add(genotype);
|
||||
}
|
||||
}
|
||||
newGC = GenotypesContext.create(genotypes);
|
||||
}
|
||||
|
||||
builder.genotypes(newGC);
|
||||
|
||||
int depth = 0;
|
||||
for (String sample : sub.getSampleNames()) {
|
||||
|
|
|
|||
|
|
@ -1,147 +1,394 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* User: carneiro
|
||||
* User: carneiro / lfran
|
||||
* Date: 3/9/11
|
||||
* Time: 12:38 PM
|
||||
*
|
||||
* Class for the identification and tracking of mendelian violation. It can be used in 2 distinct ways:
|
||||
* - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while
|
||||
* walking over the variants
|
||||
* - Or using the static methods to directly get information about mendelian violation in a family at a given locus
|
||||
*
|
||||
*/
|
||||
public class MendelianViolation {
|
||||
String sampleMom;
|
||||
String sampleDad;
|
||||
String sampleChild;
|
||||
//List of families with violations
|
||||
private List<String> violationFamilies;
|
||||
|
||||
List allelesMom;
|
||||
List allelesDad;
|
||||
List allelesChild;
|
||||
//Call information
|
||||
private int nocall = 0;
|
||||
private int familyCalled = 0;
|
||||
private int varFamilyCalled = 0;
|
||||
private int lowQual = 0;
|
||||
|
||||
double minGenotypeQuality;
|
||||
private boolean allCalledOnly = true;
|
||||
|
||||
//Stores occurrences of inheritance
|
||||
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
|
||||
|
||||
private int violations_total=0;
|
||||
|
||||
private double minGenotypeQuality;
|
||||
|
||||
private boolean abortOnSampleNotFound;
|
||||
|
||||
//Number of families with genotype information for all members
|
||||
public int getFamilyCalledCount(){
|
||||
return familyCalled;
|
||||
}
|
||||
|
||||
//Number of families with genotype information for all members
|
||||
public int getVarFamilyCalledCount(){
|
||||
return varFamilyCalled;
|
||||
}
|
||||
|
||||
//Number of families missing genotypes for one or more of their members
|
||||
public int getFamilyNoCallCount(){
|
||||
return nocall;
|
||||
}
|
||||
|
||||
//Number of families with genotypes below the set quality threshold
|
||||
public int getFamilyLowQualsCount(){
|
||||
return lowQual;
|
||||
}
|
||||
|
||||
public int getViolationsCount(){
|
||||
return violations_total;
|
||||
}
|
||||
|
||||
//Count of alt alleles inherited from het parents (no violation)
|
||||
public int getParentHetInheritedVar(){
|
||||
return getParentsHetHetInheritedVar() + getParentsRefHetInheritedVar() + getParentsVarHetInheritedVar();
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from het parents (no violation)
|
||||
public int getParentHetInheritedRef(){
|
||||
return getParentsHetHetInheritedRef() + getParentsRefHetInheritedRef() + getParentsVarHetInheritedRef();
|
||||
}
|
||||
|
||||
//Count of HomRef/HomRef/HomRef trios
|
||||
public int getRefRefRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of HomVar/HomVar/HomVar trios
|
||||
public int getVarVarVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of HomRef/HomVar/Het trios
|
||||
public int getRefVarHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
|
||||
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/Het trios
|
||||
public int getHetHetHet(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomRef trios
|
||||
public int getHetHetHomRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomVar trios
|
||||
public int getHetHetHomVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
//return parentsHetHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
//return parentsHetHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
//return parentsHomRefHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
//return parentsHomRefHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
//return parentsHomVarHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
//return parentsHomVarHet_childVar;
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
|
||||
public int getParentsRefRefChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HET
|
||||
public int getParentsRefRefChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HET -> HOM_VAR
|
||||
public int getParentsRefHetChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
|
||||
public int getParentsRefVarChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
|
||||
public int getParentsRefVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HET -> HOM_REF
|
||||
public int getParentsVarHetChildRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
|
||||
public int getParentsVarVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
|
||||
public int getParentsVarVarChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
}
|
||||
|
||||
|
||||
//Count of violations of the type HOM_VAR/? -> HOM_REF
|
||||
public int getParentVarChildRef(){
|
||||
return getParentsRefVarChildRef() + getParentsVarHetChildRef() +getParentsVarVarChildRef();
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/? -> HOM_VAR
|
||||
public int getParentRefChildVar(){
|
||||
return getParentsRefVarChildVar() + getParentsRefHetChildVar() +getParentsRefRefChildVar();
|
||||
}
|
||||
|
||||
//Returns a String containing all trios where a Mendelian violation was observed.
|
||||
//The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..."
|
||||
public String getViolationFamiliesString(){
|
||||
if(violationFamilies.isEmpty())
|
||||
return "";
|
||||
|
||||
Iterator<String> it = violationFamilies.iterator();
|
||||
String violationFams = it.next();
|
||||
while(it.hasNext()){
|
||||
violationFams += ","+it.next();
|
||||
}
|
||||
return violationFams;
|
||||
}
|
||||
|
||||
public List<String> getViolationFamilies(){
|
||||
return violationFamilies;
|
||||
}
|
||||
|
||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public String getSampleMom() {
|
||||
return sampleMom;
|
||||
}
|
||||
public String getSampleDad() {
|
||||
return sampleDad;
|
||||
}
|
||||
public String getSampleChild() {
|
||||
return sampleChild;
|
||||
}
|
||||
public double getMinGenotypeQuality() {
|
||||
return minGenotypeQuality;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param sampleMomP - sample name of mom
|
||||
* @param sampleDadP - sample name of dad
|
||||
* @param sampleChildP - sample name of child
|
||||
*/
|
||||
public MendelianViolation (String sampleMomP, String sampleDadP, String sampleChildP) {
|
||||
sampleMom = sampleMomP;
|
||||
sampleDad = sampleDadP;
|
||||
sampleChild = sampleChildP;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param family - the sample names string "mom+dad=child"
|
||||
/**
|
||||
* Constructor
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
*/
|
||||
public MendelianViolation(String family, double minGenotypeQualityP) {
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
|
||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||
if (m.matches()) {
|
||||
sampleMom = m.group(1);
|
||||
sampleDad = m.group(2);
|
||||
sampleChild = m.group(3);
|
||||
}
|
||||
else
|
||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||
}
|
||||
|
||||
/**
|
||||
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
|
||||
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation
|
||||
*/
|
||||
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
|
||||
sampleMom = sample.getMother().getID();
|
||||
sampleDad = sample.getFather().getID();
|
||||
sampleChild = sample.getID();
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
|
||||
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
|
||||
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
|
||||
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
|
||||
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
|
||||
*/
|
||||
public boolean setAlleles (VariantContext vc)
|
||||
{
|
||||
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
|
||||
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
|
||||
Genotype gChild = vc.getGenotypes(sampleChild).get(sampleChild);
|
||||
|
||||
if (gMom == null || gDad == null || gChild == null)
|
||||
throw new IllegalArgumentException(String.format("Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s", vc.getChr(), vc.getStart(), sampleMom, sampleDad, sampleChild));
|
||||
|
||||
if (gMom.isNoCall() || gDad.isNoCall() || gChild.isNoCall() ||
|
||||
gMom.getPhredScaledQual() < minGenotypeQuality ||
|
||||
gDad.getPhredScaledQual() < minGenotypeQuality ||
|
||||
gChild.getPhredScaledQual() < minGenotypeQuality ) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
allelesMom = gMom.getAlleles();
|
||||
allelesDad = gDad.getAlleles();
|
||||
allelesChild = gChild.getAlleles();
|
||||
return !allelesMom.isEmpty() && !allelesDad.isEmpty() && !allelesChild.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public MendelianViolation(double minGenotypeQualityP) {
|
||||
this(minGenotypeQualityP,true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
|
||||
*/
|
||||
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound) {
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
this.abortOnSampleNotFound = abortOnSampleNotFound;
|
||||
violationFamilies = new ArrayList<String>();
|
||||
createInheritanceMap();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
|
||||
* @param completeTriosOnly - whether only complete trios are considered or parent/child pairs are too.
|
||||
*/
|
||||
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound, boolean completeTriosOnly) {
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
this.abortOnSampleNotFound = abortOnSampleNotFound;
|
||||
violationFamilies = new ArrayList<String>();
|
||||
createInheritanceMap();
|
||||
allCalledOnly = completeTriosOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param families the families to be checked for Mendelian violations
|
||||
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
|
||||
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
|
||||
*
|
||||
*/
|
||||
public boolean isViolation(VariantContext vc)
|
||||
{
|
||||
return setAlleles(vc) && isViolation();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return whether or not there is a mendelian violation at the site.
|
||||
*/
|
||||
public boolean isViolation() {
|
||||
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
|
||||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
|
||||
return false;
|
||||
return true;
|
||||
public int countViolations(Map<String, Set<Sample>> families, VariantContext vc){
|
||||
|
||||
//Reset counts
|
||||
nocall = 0;
|
||||
lowQual = 0;
|
||||
familyCalled = 0;
|
||||
varFamilyCalled = 0;
|
||||
violations_total=0;
|
||||
violationFamilies.clear();
|
||||
clearInheritanceMap();
|
||||
|
||||
for(Set<Sample> family : families.values()){
|
||||
Iterator<Sample> sampleIterator = family.iterator();
|
||||
Sample sample;
|
||||
while(sampleIterator.hasNext()){
|
||||
sample = sampleIterator.next();
|
||||
if(sample.getParents().size() > 0)
|
||||
updateViolations(sample.getFamilyID(),sample.getMaternalID(), sample.getPaternalID(), sample.getID() ,vc);
|
||||
}
|
||||
}
|
||||
return violations_total;
|
||||
}
|
||||
|
||||
public boolean isViolation(Sample mother, Sample father, Sample child, VariantContext vc){
|
||||
|
||||
//Reset counts
|
||||
nocall = 0;
|
||||
lowQual = 0;
|
||||
familyCalled = 0;
|
||||
varFamilyCalled = 0;
|
||||
violations_total=0;
|
||||
violationFamilies.clear();
|
||||
clearInheritanceMap();
|
||||
updateViolations(mother.getFamilyID(),mother.getID(),father.getID(),child.getID(),vc);
|
||||
return violations_total>0;
|
||||
}
|
||||
|
||||
|
||||
private void updateViolations(String familyId, String motherId, String fatherId, String childId, VariantContext vc){
|
||||
|
||||
int count;
|
||||
Genotype gMom = vc.getGenotype(motherId);
|
||||
Genotype gDad = vc.getGenotype(fatherId);
|
||||
Genotype gChild = vc.getGenotype(childId);
|
||||
|
||||
if (gMom == null || gDad == null || gChild == null){
|
||||
if(abortOnSampleNotFound)
|
||||
throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getChr(), vc.getStart(), familyId, motherId, fatherId, childId));
|
||||
else
|
||||
return;
|
||||
}
|
||||
//Count No calls
|
||||
if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){
|
||||
nocall++;
|
||||
}
|
||||
else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){
|
||||
nocall++;
|
||||
}
|
||||
//Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
|
||||
else if (minGenotypeQuality>0 && (gMom.getPhredScaledQual() < minGenotypeQuality ||
|
||||
gDad.getPhredScaledQual() < minGenotypeQuality ||
|
||||
gChild.getPhredScaledQual() < minGenotypeQuality )) {
|
||||
lowQual++;
|
||||
}
|
||||
else{
|
||||
//Count all families per loci called
|
||||
familyCalled++;
|
||||
//If the family is all homref, not too interesting
|
||||
if(!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef()))
|
||||
{
|
||||
varFamilyCalled++;
|
||||
if(isViolation(gMom, gDad, gChild)){
|
||||
violationFamilies.add(familyId);
|
||||
violations_total++;
|
||||
}
|
||||
}
|
||||
count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType());
|
||||
inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isViolation(Genotype gMom, Genotype gDad, Genotype gChild) {
|
||||
//1 parent is no "call
|
||||
if(!gMom.isCalled()){
|
||||
return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef());
|
||||
}
|
||||
else if(!gDad.isCalled()){
|
||||
return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef());
|
||||
}
|
||||
//Both parents have genotype information
|
||||
return !(gMom.getAlleles().contains(gChild.getAlleles().get(0)) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) ||
|
||||
gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(gChild.getAlleles().get(0)));
|
||||
}
|
||||
|
||||
private void createInheritanceMap(){
|
||||
|
||||
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void clearInheritanceMap(){
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the likelihood ratio for a mendelian violation
|
||||
*/
|
||||
public double violationLikelihoodRatio(VariantContext vc) {
|
||||
public double violationLikelihoodRatio(VariantContext vc, String motherId, String fatherId, String childId) {
|
||||
double[] logLikAssignments = new double[27];
|
||||
// the matrix to set up is
|
||||
// MOM DAD CHILD
|
||||
|
|
@ -152,9 +399,9 @@ public class MendelianViolation {
|
|||
// AA AB | AB
|
||||
// |- BB
|
||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||
double[] momGL = vc.getGenotype(motherId).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(fatherId).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(childId).getLikelihoods().getAsVector();
|
||||
int offset = 0;
|
||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||
|
|
|
|||
|
|
@ -27,11 +27,42 @@ public class SampleDBUnitTest extends BaseTest {
|
|||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
|
||||
|
||||
private static final Set<Sample> testPEDFamilyF2 = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("s2", "fam2", "d2", "m2", Gender.FEMALE, Affection.AFFECTED),
|
||||
new Sample("d2", "fam2", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("m2", "fam2", null, null, Gender.FEMALE, Affection.UNKNOWN)
|
||||
));
|
||||
|
||||
private static final Set<Sample> testPEDFamilyF3 = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED),
|
||||
new Sample("d1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN),
|
||||
new Sample("m1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN)
|
||||
));
|
||||
|
||||
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
||||
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
||||
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
|
||||
|
||||
private static final HashMap<String, Set<Sample>> testGetFamilies = new HashMap<String,Set<Sample>>();
|
||||
static {
|
||||
testGetFamilies.put("fam1", testPEDSamples);
|
||||
testGetFamilies.put("fam2", testPEDFamilyF2);
|
||||
testGetFamilies.put("fam3", testPEDFamilyF3);
|
||||
}
|
||||
|
||||
private static final Set<Sample> testKidsWithParentsFamilies2 = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("kid3", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("kid2", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED)));
|
||||
|
||||
private static final HashSet<String> testGetPartialFamiliesIds = new HashSet<String>(Arrays.asList("kid","s1"));
|
||||
private static final HashMap<String, Set<Sample>> testGetPartialFamilies = new HashMap<String,Set<Sample>>();
|
||||
static {
|
||||
testGetPartialFamilies.put("fam1", new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
|
||||
testGetPartialFamilies.put("fam3", new HashSet<Sample>(Arrays.asList(new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED))));
|
||||
}
|
||||
|
||||
private static final String testPEDString =
|
||||
String.format("%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
|
|
@ -46,6 +77,18 @@ public class SampleDBUnitTest extends BaseTest {
|
|||
"fam3 s1 d1 m1 2 2",
|
||||
"fam2 s2 d2 m2 2 2");
|
||||
|
||||
private static final String testPEDMultipleFamilies2 =
|
||||
String.format("%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1",
|
||||
"fam1 mom 0 0 2 2",
|
||||
"fam4 kid4 dad4 0 1 2",
|
||||
"fam4 dad4 0 0 1 1",
|
||||
"fam5 kid2 dad2 mom2 1 2",
|
||||
"fam5 kid3 dad2 mom2 1 2",
|
||||
"fam5 dad2 0 0 1 1",
|
||||
"fam5 mom2 0 0 2 2");
|
||||
|
||||
private static final String testPEDStringInconsistentGender =
|
||||
"fam1 kid 0 0 2 2";
|
||||
|
||||
|
|
@ -138,6 +181,25 @@ public class SampleDBUnitTest extends BaseTest {
|
|||
Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void getFamilies(){
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(db.getFamilies(),testGetFamilies);
|
||||
Assert.assertEquals(db.getFamilies(null),testGetFamilies);
|
||||
Assert.assertEquals(db.getFamilies(testGetPartialFamiliesIds),testGetPartialFamilies);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testGetChildrenWithParents()
|
||||
{
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies2));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(db.getChildrenWithParents(), testKidsWithParentsFamilies2);
|
||||
Assert.assertEquals(db.getChildrenWithParents(false), testKidsWithParentsFamilies2);
|
||||
Assert.assertEquals(db.getChildrenWithParents(true), new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadFamilyIDs() {
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,104 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||
|
||||
static double[] AA1, AB1, BB1;
|
||||
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
|
||||
static final int numSamples = 3;
|
||||
static double[][] priors = new double[2][2*numSamples+1]; // flat priors
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
AA1 = new double[]{0.0, -20.0, -20.0};
|
||||
AB1 = new double[]{-20.0, 0.0, -20.0};
|
||||
BB1 = new double[]{-20.0, -20.0, 0.0};
|
||||
AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
|
||||
AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0};
|
||||
AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
|
||||
BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
|
||||
BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
|
||||
CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0};
|
||||
}
|
||||
|
||||
private class GetGLsTest extends TestDataProvider {
|
||||
GenotypesContext GLs;
|
||||
int numAltAlleles;
|
||||
String name;
|
||||
|
||||
private GetGLsTest(String name, int numAltAlleles, Genotype... arg) {
|
||||
super(GetGLsTest.class, name);
|
||||
GLs = GenotypesContext.create(arg);
|
||||
this.name = name;
|
||||
this.numAltAlleles = numAltAlleles;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s input=%s", super.toString(), GLs);
|
||||
}
|
||||
}
|
||||
|
||||
private static Genotype createGenotype(String name, double[] gls) {
|
||||
return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls);
|
||||
}
|
||||
|
||||
@DataProvider(name = "getGLs")
|
||||
public Object[][] createGLsData() {
|
||||
|
||||
// bi-allelic case
|
||||
new GetGLsTest("B0", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AA3", AA1));
|
||||
new GetGLsTest("B1", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AB", AB1));
|
||||
new GetGLsTest("B2", 1, createGenotype("AA1", AA1), createGenotype("BB", BB1), createGenotype("AA2", AA1));
|
||||
new GetGLsTest("B3a", 1, createGenotype("AB", AB1), createGenotype("AA", AA1), createGenotype("BB", BB1));
|
||||
new GetGLsTest("B3b", 1, createGenotype("AB1", AB1), createGenotype("AB2", AB1), createGenotype("AB3", AB1));
|
||||
new GetGLsTest("B4", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("AA", AA1));
|
||||
new GetGLsTest("B5", 1, createGenotype("BB1", BB1), createGenotype("AB", AB1), createGenotype("BB2", BB1));
|
||||
new GetGLsTest("B6", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("BB3", BB1));
|
||||
|
||||
// tri-allelic case
|
||||
new GetGLsTest("B1C0", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AB", AB2));
|
||||
new GetGLsTest("B0C1", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AC", AC2));
|
||||
new GetGLsTest("B1C1a", 2, createGenotype("AA", AA2), createGenotype("AB", AB2), createGenotype("AC", AC2));
|
||||
new GetGLsTest("B1C1b", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("BC", BC2));
|
||||
new GetGLsTest("B2C1", 2, createGenotype("AB1", AB2), createGenotype("AB2", AB2), createGenotype("AC", AC2));
|
||||
new GetGLsTest("B3C2a", 2, createGenotype("AB", AB2), createGenotype("BC1", BC2), createGenotype("BC2", BC2));
|
||||
new GetGLsTest("B3C2b", 2, createGenotype("AB", AB2), createGenotype("BB", BB2), createGenotype("CC", CC2));
|
||||
|
||||
return GetGLsTest.getTests(GetGLsTest.class);
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "getGLs")
|
||||
public void testGLs(GetGLsTest cfg) {
|
||||
|
||||
final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1];
|
||||
final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1];
|
||||
for ( int i = 0; i < 2; i++ ) {
|
||||
for ( int j = 0; j < 2*numSamples+1; j++ ) {
|
||||
log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
}
|
||||
}
|
||||
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
|
||||
|
||||
int nameIndex = 1;
|
||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||
int calculatedAlleleCount = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors[allele]);
|
||||
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,7 +7,6 @@ import org.testng.annotations.Test;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
// ********************************************************************************** //
|
||||
|
|
@ -29,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("286f0de92e4ce57986ba861390c6019d"));
|
||||
Arrays.asList("a2d3839c4ebb390b0012d495e4e53b3a"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -45,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("d0593483e85a7d815f4c5ee6db284d2a"));
|
||||
Arrays.asList("43e7a17d95b1a0cf72e669657794d802"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -53,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("3ccce5d909f8f128e496f6841836e5f7"));
|
||||
Arrays.asList("ae29b9c9aacce8046dc780430540cd62"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -84,7 +83,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "95614280c565ad90f8c000376fef822c";
|
||||
String md5 = "32a34362dff51d8b73a3335048516d82";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -165,8 +164,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" );
|
||||
e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" );
|
||||
e.put( 0.01, "2cb2544739e01f6c08fd820112914317" );
|
||||
e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -276,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("6e182a58472ea17c8b0eb01f80562fbd"));
|
||||
Arrays.asList("45633d905136c86e9d3f90ce613255e5"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -286,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||
Arrays.asList("f93f8a35b47bcf96594ada55e2312c73"));
|
||||
Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead"));
|
||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -295,8 +294,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("9be28cb208d8b0314d2bc2696e2fd8d4"));
|
||||
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
|
||||
Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51"));
|
||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -291,6 +291,17 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVEMendelianViolationEvaluator() {
|
||||
String vcfFile = "/MendelianViolationEval.vcf";
|
||||
String pedFile = "/MendelianViolationEval.ped";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST",
|
||||
1,
|
||||
Arrays.asList("66e72c887124f40933d32254b2dd44a3"));
|
||||
executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompVsEvalAC() {
|
||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||
|
|
|
|||
|
|
@ -115,4 +115,26 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParallelization() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
WalkerTestSpec spec;
|
||||
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
1,
|
||||
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
|
||||
);
|
||||
executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
1,
|
||||
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
|
||||
);
|
||||
|
||||
executeTest("testParallelization (4 threads)--" + testfile, spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,6 +83,10 @@ class DataProcessingPipeline extends QScript {
|
|||
@Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false)
|
||||
var defaultPlatform: String = ""
|
||||
|
||||
@Hidden
|
||||
@Input(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false)
|
||||
var testMode: Boolean = false
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Global Variables
|
||||
|
|
@ -335,6 +339,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.known ++= qscript.indels
|
||||
this.consensusDeterminationModel = cleanModelEnum
|
||||
this.compress = 0
|
||||
this.noPGTag = qscript.testMode;
|
||||
this.scatterCount = nContigs
|
||||
this.analysisName = queueLogDir + outBam + ".clean"
|
||||
this.jobName = queueLogDir + outBam + ".clean"
|
||||
|
|
@ -360,6 +365,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.out = outBam
|
||||
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
|
||||
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
|
||||
this.no_pg_tag = qscript.testMode
|
||||
this.scatterCount = nContigs
|
||||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||
|
|
|
|||
|
|
@ -47,6 +47,10 @@ class PacbioProcessingPipeline extends QScript {
|
|||
@Input(shortName="bwastring", required=false)
|
||||
var bwastring: String = ""
|
||||
|
||||
@Hidden
|
||||
@Input(shortName = "test", fullName = "test_mode", required = false)
|
||||
var testMode: Boolean = false
|
||||
|
||||
val queueLogDir: String = ".qlog/"
|
||||
|
||||
def script = {
|
||||
|
|
@ -170,6 +174,7 @@ class PacbioProcessingPipeline extends QScript {
|
|||
this.input_file :+= inBam
|
||||
this.recal_file = inRecalFile
|
||||
this.out = outBam
|
||||
this.no_pg_tag = testMode
|
||||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||
|
|
@ -177,7 +182,6 @@ class PacbioProcessingPipeline extends QScript {
|
|||
}
|
||||
|
||||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||
this.resources = R
|
||||
this.recal_file = inRecalFile
|
||||
this.output_dir = outPath
|
||||
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,69 @@
|
|||
package org.broadinstitute.sting.queue.pipeline
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.testng.annotations.Test
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class DataProcessingPipelineTest {
|
||||
@Test
|
||||
def testSimpleBAM {
|
||||
val projectName = "test1"
|
||||
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "DataProcessingPipeline"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
|
||||
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||
" -i " + BaseTest.testDir + "exampleBAM.bam",
|
||||
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
|
||||
" -nv ",
|
||||
" -test ",
|
||||
" -p " + projectName).mkString
|
||||
spec.fileMD5s += testOut -> "1f85e76de760167a77ed1d9ab4da2936"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testBWAPEBAM {
|
||||
val projectName = "test2"
|
||||
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "DataProcessingPipeline"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
|
||||
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||
" -i " + BaseTest.testDir + "exampleBAM.bam",
|
||||
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
|
||||
" -nv ",
|
||||
" -test ",
|
||||
" -bwa /home/unix/carneiro/bin/bwa",
|
||||
" -bwape ",
|
||||
" -p " + projectName).mkString
|
||||
spec.fileMD5s += testOut -> "57416a0abdf9524bc92834d466529708"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
package org.broadinstitute.sting.queue.pipeline
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.testng.annotations.Test
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class PacbioProcessingPipelineTest {
|
||||
@Test
|
||||
def testBAM {
|
||||
val testOut = "exampleBAM.recal.bam"
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "pacbioProcessingPipeline"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala",
|
||||
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||
" -i " + BaseTest.testDir + "exampleBAM.bam",
|
||||
" -blasr ",
|
||||
" -test ",
|
||||
" -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString
|
||||
spec.fileMD5s += testOut -> "f0adce660b55cb91d5f987f9a145471e"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,282 @@
|
|||
##fileformat=VCFv4.1
|
||||
##FILTER=<ID=NC,Description="Inconsistent Genotype Submission For At Least One Sample">
|
||||
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
|
||||
##INFO=<ID=ASP,Number=0,Type=Flag,Description="Is Assembly specific. This is set if the variant only maps to one assembly">
|
||||
##INFO=<ID=ASS,Number=0,Type=Flag,Description="In acceptor splice site FxnCode = 73">
|
||||
##INFO=<ID=CDA,Number=0,Type=Flag,Description="Variation is interrogated in a clinical diagnostic assay">
|
||||
##INFO=<ID=CFL,Number=0,Type=Flag,Description="Has Assembly conflict. This is for weight 1 and 2 variant that maps to different chromosomes on different assemblies.">
|
||||
##INFO=<ID=CLN,Number=0,Type=Flag,Description="Variant is Clinical(LSDB,OMIM,TPA,Diagnostic)">
|
||||
##INFO=<ID=DSS,Number=0,Type=Flag,Description="In donor splice-site FxnCode = 75">
|
||||
##INFO=<ID=G5,Number=0,Type=Flag,Description=">5% minor allele frequency in 1+ populations">
|
||||
##INFO=<ID=G5A,Number=0,Type=Flag,Description=">5% minor allele frequency in each and all populations">
|
||||
##INFO=<ID=GCF,Number=0,Type=Flag,Description="Has Genotype Conflict Same (rs, ind), different genotype. N/N is not included.">
|
||||
##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar (|)">
|
||||
##INFO=<ID=GMAF,Number=1,Type=Float,Description="Global Minor Allele Frequency [0, 0.5]; global population is 1000GenomesProject phase 1 genotype data from 629 individuals, released in the 08-04-2010 dataset">
|
||||
##INFO=<ID=GNO,Number=0,Type=Flag,Description="Genotypes available. The variant has individual genotype (in SubInd table).">
|
||||
##INFO=<ID=HD,Number=0,Type=Flag,Description="Marker is on high density genotyping kit (50K density or greater). The variant may have phenotype associations present in dbGaP.">
|
||||
##INFO=<ID=INT,Number=0,Type=Flag,Description="In Intron FxnCode = 6">
|
||||
##INFO=<ID=KGPROD,Number=0,Type=Flag,Description="1000 Genome production phase">
|
||||
##INFO=<ID=KGPilot1,Number=0,Type=Flag,Description="1000 Genome discovery(pilot1) 2009">
|
||||
##INFO=<ID=KGPilot123,Number=0,Type=Flag,Description="1000 Genome discovery all pilots 2010(1,2,3)">
|
||||
##INFO=<ID=KGVAL,Number=0,Type=Flag,Description="1000 Genome validated by second method">
|
||||
##INFO=<ID=LSD,Number=0,Type=Flag,Description="Submitted from a locus-specific database">
|
||||
##INFO=<ID=MTP,Number=0,Type=Flag,Description="Microattribution/third-party annotation(TPA:GWAS,PAGE)">
|
||||
##INFO=<ID=MUT,Number=0,Type=Flag,Description="Is mutation (journal citation, explicit fact): a low frequency variation that is cited in journal and other reputable sources">
|
||||
##INFO=<ID=NOC,Number=0,Type=Flag,Description="Contig allele not present in variant allele list. The reference sequence allele at the mapped position is not present in the variant allele list, adjusted for orientation.">
|
||||
##INFO=<ID=NOV,Number=0,Type=Flag,Description="Rs cluster has non-overlapping allele sets. True when rs set has more than 2 alleles from different submissions and these sets share no alleles in common.">
|
||||
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
|
||||
##INFO=<ID=NSF,Number=0,Type=Flag,Description="Has non-synonymous frameshift A coding region variation where one allele in the set changes all downstream amino acids. FxnClass = 44">
|
||||
##INFO=<ID=NSM,Number=0,Type=Flag,Description="Has non-synonymous missense A coding region variation where one allele in the set changes protein peptide. FxnClass = 42">
|
||||
##INFO=<ID=NSN,Number=0,Type=Flag,Description="Has non-synonymous nonsense A coding region variation where one allele in the set changes to STOP codon (TER). FxnClass = 41">
|
||||
##INFO=<ID=OM,Number=0,Type=Flag,Description="Has OMIM/OMIA">
|
||||
##INFO=<ID=OTH,Number=0,Type=Flag,Description="Has other variant with exactly the same set of mapped positions on NCBI refernce assembly.">
|
||||
##INFO=<ID=PH1,Number=0,Type=Flag,Description="Phase 1 genotyped: filtered, non-redundant">
|
||||
##INFO=<ID=PH2,Number=0,Type=Flag,Description="Phase 2 genotyped: filtered, non-redundant">
|
||||
##INFO=<ID=PH3,Number=0,Type=Flag,Description="Phase 3 genotyped: filtered, non-redundant">
|
||||
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
|
||||
##INFO=<ID=PMC,Number=0,Type=Flag,Description="Links exist to PubMed Central article">
|
||||
##INFO=<ID=R3,Number=0,Type=Flag,Description="In 3' gene region FxnCode = 13">
|
||||
##INFO=<ID=R5,Number=0,Type=Flag,Description="In 5' gene region FxnCode = 15">
|
||||
##INFO=<ID=REF,Number=0,Type=Flag,Description="Has reference A coding region variation where one allele in the set is identical to the reference sequence. FxnCode = 8">
|
||||
##INFO=<ID=RSPOS,Number=1,Type=Integer,Description="Chr position reported in dbSNP">
|
||||
##INFO=<ID=RV,Number=0,Type=Flag,Description="RS orientation is reversed">
|
||||
##INFO=<ID=S3D,Number=0,Type=Flag,Description="Has 3D structure - SNP3D table">
|
||||
##INFO=<ID=SAO,Number=1,Type=Integer,Description="Variant Allele Origin: 0 - unspecified, 1 - Germline, 2 - Somatic, 3 - Both">
|
||||
##INFO=<ID=SCS,Number=1,Type=Integer,Description="Variant Clinical Significance, 0 - unknown, 1 - untested, 2 - non-pathogenic, 3 - probable-non-pathogenic, 4 - probable-pathogenic, 5 - pathogenic, 6 - drug-response, 7 - histocompatibility, 255 - other">
|
||||
##INFO=<ID=SLO,Number=0,Type=Flag,Description="Has SubmitterLinkOut - From SNP->SubSNP->Batch.link_out">
|
||||
##INFO=<ID=SSR,Number=1,Type=Integer,Description="Variant Suspect Reason Code, 0 - unspecified, 1 - Paralog, 2 - byEST, 3 - Para_EST, 4 - oldAlign, 5 - other">
|
||||
##INFO=<ID=SYN,Number=0,Type=Flag,Description="Has synonymous A coding region variation where one allele in the set does not change the encoded amino acid. FxnCode = 3">
|
||||
##INFO=<ID=TPA,Number=0,Type=Flag,Description="Provisional Third Party Annotation(TPA) (currently rs from PHARMGKB who will give phenotype data)">
|
||||
##INFO=<ID=U3,Number=0,Type=Flag,Description="In 3' UTR Location is in an untranslated region (UTR). FxnCode = 53">
|
||||
##INFO=<ID=U5,Number=0,Type=Flag,Description="In 5' UTR Location is in an untranslated region (UTR). FxnCode = 55">
|
||||
##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
|
||||
##INFO=<ID=VLD,Number=0,Type=Flag,Description="Is Validated. This bit is set if the variant has 2+ minor allele count based on frequency or genotype data.">
|
||||
##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property">
|
||||
##INFO=<ID=WGT,Number=1,Type=Integer,Description="Weight, 00 - unmapped, 1 - weight 1, 2 - weight 2, 3 - weight 3 or more">
|
||||
##INFO=<ID=WTD,Number=0,Type=Flag,Description="Is Withdrawn by submitter If one member ss is withdrawn by submitter, then this bit is set. If all member ss' are withdrawn, then the rs is deleted to SNPHistory">
|
||||
##INFO=<ID=dbSNPBuildID,Number=1,Type=Integer,Description="First dbSNP Build for RS">
|
||||
##LeftAlignVariants="analysis_type=LeftAlignVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=00-All.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false"
|
||||
##contig=<ID=chr1,length=249250621,assembly=b37>
|
||||
##phasing=partial
|
||||
##reference=GRCh37.3
|
||||
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
|
||||
##source=dbSNP
|
||||
##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf
|
||||
#CHROM POS ID REF ALT QUAL FILTER INFO
|
||||
chr1 10144 rs144773400 TA T . PASS ASP;RSPOS=10145;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||
chr1 10228 rs143255646 TA T . PASS ASP;RSPOS=10229;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||
chr1 10234 rs145599635 C T . PASS ASP;RSPOS=10234;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 10248 rs148908337 A T . PASS ASP;RSPOS=10248;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 10254 rs140194106 TA T . PASS ASP;RSPOS=10255;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||
chr1 10291 rs145427775 C T . PASS ASP;RSPOS=10291;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 10327 rs112750067 T C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10327;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 10329 rs150969722 AC A . PASS ASP;RSPOS=10330;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||
chr1 10351 rs145072688 CTA C,CA . PASS ASP;RSPOS=10352;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||
chr1 10382 rs147093981 AAC A,AC . PASS ASP;RSPOS=10383;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||
chr1 10433 rs56289060 A AC . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10433;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 10439 rs112766696 AC A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10440;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=132
|
||||
chr1 10439 rs138941843 AC A . PASS ASP;RSPOS=10440;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||
chr1 10440 rs112155239 C A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10440;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 10492 rs55998931 C T . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.0617001828153565;RSPOS=10492;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040000000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 10519 rs62636508 G C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10519;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 10583 rs58108140 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.270566727605119;KGPilot123;RSPOS=10583;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 10611 rs189107123 C G . PASS KGPilot123;RSPOS=10611;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 10828 rs10218492 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10828;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
|
||||
chr1 10904 rs10218493 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10904;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=119
|
||||
chr1 10927 rs10218527 A G . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10927;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
|
||||
chr1 10938 rs28853987 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10938;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 11014 rs28484712 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11014;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 11022 rs28775022 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11022;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 11081 rs10218495 G T . PASS CFL;GENEINFO=LOC100652771:100652771;GNO;RSPOS=11081;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=119
|
||||
chr1 11863 rs187669455 C A . PASS RSPOS=11863;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 13302 rs180734498 C T . PASS KGPilot123;RSPOS=13302;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 13327 rs144762171 G C . PASS ASP;KGPilot123;RSPOS=13327;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 13684 rs71260404 C T . PASS GENEINFO=LOC100652771:100652771;GNO;RSPOS=13684;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
|
||||
chr1 13980 rs151276478 T C . PASS ASP;KGPilot123;RSPOS=13980;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 14889 rs142444908 G A . PASS ASP;RSPOS=14889;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 14907 rs79585140 A G . PASS GNO;RSPOS=14907;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 14930 rs75454623 A G . PASS GNO;RSPOS=14930;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 14976 rs71252251 G A . PASS ASP;GNO;RSPOS=14976;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
|
||||
chr1 15061 rs71268703 T TG . PASS ASP;GNO;RSPOS=15061;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
|
||||
chr1 15118 rs71252250 A G . PASS ASP;GNO;RSPOS=15118;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
|
||||
chr1 15211 rs144718396 T G . PASS ASP;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 15211 rs78601809 T G . PASS ASP;GNO;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 16257 rs78588380 G C . PASS ASP;GNO;RSPOS=16257;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 16378 rs148220436 T C . PASS ASP;RSPOS=16378;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 16495 rs141130360 G C . PASS ASP;RSPOS=16495;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 16497 rs150723783 A G . PASS ASP;RSPOS=16497;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 17519 rs192890528 G T . PASS RSPOS=17519;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 19226 rs138930629 T A . PASS ASP;RSPOS=19226;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 20141 rs56336884 G A . PASS HD;RSPOS=20141;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000400000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 20144 rs143346096 G A . PASS ASP;RSPOS=20144;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 20206 rs71262675 C T . PASS GNO;RSPOS=20206;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
|
||||
chr1 20245 rs71262674 G A . PASS GMAF=0.256398537477148;GNO;RSPOS=20245;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
|
||||
chr1 20304 rs71262673 G C . PASS GMAF=0.338208409506399;GNO;RSPOS=20304;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
|
||||
chr1 26999 rs147506580 A G . PASS ASP;RSPOS=26999;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 29436 rs2462493 G A . PASS GNO;RSPOS=29436;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=100
|
||||
chr1 30923 rs140337953 G T . PASS ASP;KGPilot123;RSPOS=30923;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 33487 rs77459554 C T . PASS ASP;GNO;RSPOS=33487;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 33495 rs75468675 C T . PASS ASP;GNO;RSPOS=33495;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 33505 rs75627161 T C . PASS ASP;GNO;RSPOS=33505;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 33508 rs75609629 A T . PASS ASP;GNO;RSPOS=33508;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 33521 rs76098219 T A . PASS GNO;RSPOS=33521;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 33593 rs557585 G A . PASS RSPOS=33593;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=83
|
||||
chr1 33648 rs62028204 G T . PASS RSPOS=33648;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 33656 rs113821789 T C . PASS RSPOS=33656;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 51476 rs187298206 T C . PASS KGPilot123;RSPOS=51476;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 51479 rs116400033 T A . PASS ASP;G5;G5A;GMAF=0.113802559414991;KGPilot123;RSPOS=51479;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070010000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 51803 rs62637812 T C . PASS GMAF=0.468921389396709;RSPOS=51803;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040000000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 51898 rs76402894 C A . PASS GMAF=0.0731261425959781;GNO;RSPOS=51898;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 51914 rs190452223 T G . PASS KGPilot123;RSPOS=51914;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 51928 rs78732933 G A . PASS GNO;RSPOS=51928;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 51935 rs181754315 C T . PASS KGPilot123;RSPOS=51935;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 51954 rs185832753 G C . PASS KGPilot123;RSPOS=51954;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 52058 rs62637813 G C . PASS GMAF=0.0342778793418647;KGPilot123;RSPOS=52058;SAO=0;SSR=1;VC=SNV;VLD;VP=050000000000040010000140;WGT=0;dbSNPBuildID=129
|
||||
chr1 52144 rs190291950 T A . PASS KGPilot123;RSPOS=52144;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 52238 rs150021059 T G . PASS ASP;KGPilot123;RSPOS=52238;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 54353 rs140052487 C A . PASS ASP;KGPilot123;RSPOS=54353;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 54421 rs146477069 A G . PASS ASP;KGPilot123;RSPOS=54421;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 54490 rs141149254 G A . PASS ASP;KGPilot123;RSPOS=54490;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 54676 rs2462492 C T . PASS ASP;GMAF=0.191956124314442;GNO;HD;KGPilot123;RSPOS=54676;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040510000100;WGT=0;dbSNPBuildID=100
|
||||
chr1 54753 rs143174675 T G . PASS ASP;KGPilot123;RSPOS=54753;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 54788 rs59861892 CC C,CCT . PASS ASP;RSPOS=54789;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 54795 rs58014817 T A . PASS ASP;RSPOS=54795;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 55164 rs3091274 C A . PASS G5;G5A;GMAF=0.145338208409506;GNO;KGPilot123;RSPOS=55164;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030110000100;WGT=0;dbSNPBuildID=103
|
||||
chr1 55299 rs10399749 C T . PASS G5;G5A;GMAF=0.278793418647166;GNO;KGPilot123;PH2;RSPOS=55299;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030112000100;WGT=0;dbSNPBuildID=119
|
||||
chr1 55302 rs3091273 C T . PASS RSPOS=55302;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
|
||||
chr1 55313 rs182462964 A T . PASS KGPilot123;RSPOS=55313;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 55322 rs3107974 C T . PASS RSPOS=55322;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
|
||||
chr1 55326 rs3107975 T C . PASS GNO;HD;KGPilot123;RSPOS=55326;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=103
|
||||
chr1 55330 rs185215913 G A . PASS KGPilot123;RSPOS=55330;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 55367 rs190850374 G A . PASS KGPilot123;RSPOS=55367;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 55388 rs182711216 C T . PASS KGPilot123;RSPOS=55388;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 55394 rs2949420 T A . PASS GNO;KGPilot123;PH2;RSPOS=55394;SAO=0;SSR=0;VC=SNV;VP=050000000000000112000100;WGT=0;dbSNPBuildID=101
|
||||
chr1 55416 rs193242050 G A . PASS KGPilot123;RSPOS=55416;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 55427 rs183189405 T C . PASS KGPilot123;RSPOS=55427;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 55545 rs28396308 C T . PASS GNO;RSPOS=55545;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 55816 rs187434873 G A . PASS KGPilot123;RSPOS=55816;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 55850 rs191890754 C G . PASS KGPilot123;RSPOS=55850;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 55852 rs184233019 G C . PASS KGPilot123;RSPOS=55852;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 56644 rs143342222 A C . PASS ASP;KGPilot123;RSPOS=56644;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 57952 rs189727433 A C . PASS KGPilot123;RSPOS=57952;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 58771 rs140128481 T C . PASS ASP;RSPOS=58771;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 58814 rs114420996 G A . PASS ASP;G5;GMAF=0.0982632541133455;KGPilot123;RSPOS=58814;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 59040 rs149755937 T C . PASS ASP;KGPilot123;RSPOS=59040;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 60718 rs78395614 G A . PASS CFL;GNO;RSPOS=60718;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 60726 rs192328835 C A . PASS KGPilot123;RSPOS=60726;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 60791 rs76199781 A G . PASS CFL;GNO;RSPOS=60791;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 61442 rs74970982 A G . PASS CFL;GMAF=0.076782449725777;GNO;KGPilot123;RSPOS=61442;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 61462 rs56992750 T A . PASS CFL;G5;G5A;GMAF=0.0383912248628885;GNO;KGPilot123;RSPOS=61462;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008030110000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 61480 rs75526266 G C . PASS CFL;GNO;RSPOS=61480;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 61499 rs75719746 G A . PASS CFL;GNO;RSPOS=61499;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 61743 rs184286948 G C . PASS KGPilot123;RSPOS=61743;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 61920 rs62637820 G A . PASS CFL;GMAF=0.0255941499085923;RSPOS=61920;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 61987 rs76735897 A G . PASS CFL;GMAF=0.292961608775137;GNO;KGPilot123;RSPOS=61987;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 61989 rs77573425 G C . PASS CFL;GMAF=0.309414990859232;GNO;KGPilot123;RSPOS=61989;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 61993 rs190553843 C T . PASS KGPilot123;RSPOS=61993;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 62156 rs181864839 C T . PASS KGPilot123;RSPOS=62156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 62157 rs10399597 G A . PASS CFL;GMAF=0.00228519195612431;KGPilot123;RSPOS=62157;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=119
|
||||
chr1 62162 rs140556834 G A . PASS ASP;KGPilot123;RSPOS=62162;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 62203 rs28402963 T C . PASS CFL;KGPilot123;RSPOS=62203;SAO=0;SSR=0;VC=SNV;VP=050000000008000010000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 62271 rs28599927 A G . PASS CFL;GMAF=0.138482632541133;RSPOS=62271;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 63268 rs75478250 T C . PASS CFL;GNO;RSPOS=63268;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 63276 rs185977555 G A . PASS KGPilot123;RSPOS=63276;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 63297 rs188886746 G A . PASS KGPilot123;RSPOS=63297;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 63671 rs116440577 G A . PASS ASP;G5;GMAF=0.170018281535649;KGPilot123;RSPOS=63671;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 63737 rs77426996 TACT T,TCTA . PASS CFL;RSPOS=63738;SAO=0;SSR=0;VC=DIV;VP=050000000008000000000200;WGT=0;dbSNPBuildID=131
|
||||
chr1 64649 rs181431124 A C . PASS KGPilot123;RSPOS=64649;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 66008 rs2691286 C G . PASS CFL;GNO;RSPOS=66008;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=100
|
||||
chr1 66162 rs62639105 A T . PASS CFL;GMAF=0.320383912248629;GNO;KGPilot123;RSPOS=66162;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000110000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 66176 rs28552463 T A . PASS CFL;GMAF=0.0484460694698355;KGPilot123;RSPOS=66176;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 66219 rs181028663 A T . PASS KGPilot123;RSPOS=66219;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 66238 rs113961546 T A . PASS CFL;GNO;RSPOS=66238;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 66314 rs28534012 T A . PASS CFL;RSPOS=66314;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 66331 rs186063952 A C . PASS KGPilot123;RSPOS=66331;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 66334 rs28464214 T A . PASS CFL;RSPOS=66334;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 66442 rs192044252 T A . PASS KGPilot123;RSPOS=66442;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 66457 rs13328655 T A . PASS CFL;GMAF=0.0795246800731261;KGPilot123;RSPOS=66457;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=121
|
||||
chr1 66503 rs112350669 T A . PASS CFL;RSPOS=66503;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 66507 rs12401368 T A . PASS CFL;GMAF=0.479890310786106;KGPilot123;RSPOS=66507;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=120
|
||||
chr1 66651 rs2257270 A T . PASS CFL;GNO;RSPOS=66651;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=100
|
||||
chr1 67179 rs149952626 C G . PASS ASP;KGPilot123;RSPOS=67179;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 67181 rs77662731 A G . PASS ASP;G5;G5A;GENEINFO=OR4F5:79501;GMAF=0.0470749542961609;GNO;KGPilot123;RSPOS=67181;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070110000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 67223 rs78676975 C A . PASS ASP;GENEINFO=OR4F5:79501;GNO;RSPOS=67223;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 69428 rs140739101 T G . PASS ASP;RSPOS=69428;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 69453 rs142004627 G A . PASS ASP;RSPOS=69453;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 69476 rs148502021 T C . PASS ASP;RSPOS=69476;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 69496 rs150690004 G A . PASS ASP;RSPOS=69496;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 69511 rs75062661 A G . PASS GENEINFO=OR4F5:79501;GMAF=0.193784277879342;GNO;KGPilot123;RSPOS=69511;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000110000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 69534 rs190717287 T C . PASS KGPilot123;RSPOS=69534;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 69552 rs55874132 G C . PASS GENEINFO=OR4F5:79501;HD;RSPOS=69552;S3D;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050300000000040400000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 69590 rs141776804 T A . PASS ASP;RSPOS=69590;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 69594 rs144967600 T C . PASS ASP;RSPOS=69594;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 72148 rs182862337 C T . PASS KGPilot123;RSPOS=72148;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 73841 rs143773730 C T . PASS ASP;KGPilot123;RSPOS=73841;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 74651 rs62641291 G A . PASS RSPOS=74651;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 74681 rs13328683 G T . PASS CFL;GMAF=0.286106032906764;RSPOS=74681;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=121
|
||||
chr1 74709 rs62641292 T A . PASS CFL;RSPOS=74709;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 74771 rs13328675 A G . PASS CFL;RSPOS=74771;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
|
||||
chr1 74790 rs13328700 C G . PASS CFL;RSPOS=74790;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
|
||||
chr1 74792 rs13328684 G A . PASS CFL;RSPOS=74792;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
|
||||
chr1 77462 rs188023513 G A . PASS KGPilot123;RSPOS=77462;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 77470 rs192898053 T C . PASS KGPilot123;RSPOS=77470;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 77874 rs184538873 G A . PASS KGPilot123;RSPOS=77874;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 77961 rs78385339 G A . PASS GMAF=0.125685557586837;KGPilot123;RSPOS=77961;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040010000100;WGT=0;dbSNPBuildID=131
|
||||
chr1 79033 rs62641298 A G . PASS GMAF=0.438299817184644;GNO;HD;KGPilot123;RSPOS=79033;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 79050 rs62641299 G T . PASS GMAF=0.224405850091408;GNO;KGPilot123;RSPOS=79050;SAO=0;SSR=0;VC=SNV;VP=050000000000000110000100;WGT=0;dbSNPBuildID=129
|
||||
chr1 79137 rs143777184 A T . PASS ASP;KGPilot123;RSPOS=79137;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 79417 rs184768190 C T . PASS KGPilot123;RSPOS=79417;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 79418 rs2691296 G C . PASS GMAF=0.0178244972577697;RSPOS=79418;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=100
|
||||
chr1 79538 rs2691295 C T . PASS RSPOS=79538;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=100
|
||||
chr1 79772 rs147215883 C G . PASS ASP;KGPilot123;RSPOS=79772;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 79872 rs189224661 T G . PASS KGPilot123;RSPOS=79872;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 80323 rs3942603 G C . PASS CFL;GNO;RSPOS=80323;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=108
|
||||
chr1 80386 rs3878915 C A . PASS GMAF=0.0118829981718464;RSPOS=80386;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=108
|
||||
chr1 80454 rs144226842 G C . PASS ASP;KGPilot123;RSPOS=80454;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 81836 rs2259560 A T . PASS ASP;GNO;RSPOS=81836;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=100
|
||||
chr1 81949 rs181567186 T C . PASS KGPilot123;RSPOS=81949;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 81962 rs4030308 T TAA . PASS ASP;RSPOS=81962;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
|
||||
chr1 82102 rs4030307 C T . PASS ASP;RSPOS=82102;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
|
||||
chr1 82103 rs2020400 T C . PASS ASP;RSPOS=82103;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
|
||||
chr1 82126 rs1815133 C T . PASS ASP;RSPOS=82126;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
|
||||
chr1 82133 rs4030306 CA C,CAAAAAAAAAAAAAAA . PASS ASP;RSPOS=82136;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
|
||||
chr1 82154 rs4477212 A G . PASS ASP;HD;RSPOS=82154;SAO=0;SSR=0;VC=SNV;VP=050000000004000400000100;WGT=0;dbSNPBuildID=111
|
||||
chr1 82162 rs1815132 C A . PASS ASP;GMAF=0.0351919561243144;GNO;RSPOS=82162;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=92
|
||||
chr1 82163 rs139113303 G A . PASS ASP;KGPilot123;RSPOS=82163;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 82196 rs112844054 A T . PASS ASP;RSPOS=82196;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
|
||||
chr1 82249 rs1851945 A G . PASS ASP;GMAF=0.0452468007312614;KGPilot123;RSPOS=82249;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=92
|
||||
chr1 82282 rs3871775 G A . PASS ASP;RSPOS=82282;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
|
||||
chr1 82303 rs3871776 T C . PASS ASP;RSPOS=82303;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
|
||||
chr1 82316 rs4030305 A C . PASS ASP;GNO;RSPOS=82316;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=108
|
||||
chr1 82609 rs149189449 C G . PASS ASP;KGPilot123;RSPOS=82609;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||
chr1 82676 rs185237834 T G . PASS KGPilot123;RSPOS=82676;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 82734 rs4030331 T C . PASS ASP;GMAF=0.261882998171846;KGPilot123;RSPOS=82734;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=108
|
||||
chr1 82957 rs189774606 C T . PASS KGPilot123;RSPOS=82957;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 83084 rs181193408 T A . PASS KGPilot123;RSPOS=83084;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 83088 rs186081601 G C . PASS KGPilot123;RSPOS=83088;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 83107 rs4405097 G C . PASS ASP;RSPOS=83107;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=111
|
||||
chr1 83119 rs4030324 AA A,ATAAC . PASS ASP;RSPOS=83120;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
|
||||
chr1 83771 rs189906733 T G . PASS KGPilot123;RSPOS=83771;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 83786 rs58520670 T TA . PASS ASP;RSPOS=83794;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83815 rs58857344 GAGAA G . PASS ASP;RSPOS=83827;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83826 rs71281475 AAAGA A,AAA . PASS ASP;GNO;RSPOS=83827;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
|
||||
chr1 83855 rs59596480 GAA G . PASS ASP;RSPOS=83857;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83872 rs59556914 AA A,AAGA . PASS ASP;RSPOS=83873;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83884 rs59586754 GAAA G . PASS ASP;RSPOS=83885;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83897 rs61330047 GAA G . PASS ASP;RSPOS=83899;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83901 rs58254183 GAAAGAA G . PASS ASP;RSPOS=83903;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83921 rs61338823 GAA G . PASS ASP;RSPOS=83923;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83930 rs71281474 AG A,AGA . PASS ASP;GNO;RSPOS=83931;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
|
||||
chr1 83934 rs59235392 AG A,AGAAA . PASS ASP;RSPOS=83935;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 83977 rs180759811 A G . PASS KGPilot123;RSPOS=83977;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84002 rs28850140 G A . PASS ASP;GMAF=0.138939670932358;KGPilot123;RSPOS=84002;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=125
|
||||
chr1 84010 rs186443818 G A . PASS KGPilot123;RSPOS=84010;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84018 rs61352176 GAA G . PASS ASP;RSPOS=84020;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||
chr1 84079 rs190867312 T C . PASS KGPilot123;RSPOS=84079;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84139 rs183605470 A T . PASS KGPilot123;RSPOS=84139;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84156 rs188652299 A C . PASS KGPilot123;RSPOS=84156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84244 rs191297051 A C . PASS KGPilot123;RSPOS=84244;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84295 rs183209871 G A . PASS KGPilot123;RSPOS=84295;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84346 rs187855973 T C . PASS KGPilot123;RSPOS=84346;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84453 rs191379015 C G . PASS KGPilot123;RSPOS=84453;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
chr1 84705 rs183470350 T G . PASS KGPilot123;RSPOS=84705;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||
|
|
@ -0,0 +1 @@
|
|||
100000 1 0
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
100000 1 11
|
||||
0 chr1 (null)
|
||||
0 100000 0
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue