Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
7a386b45a5
|
|
@ -711,6 +711,8 @@ public class SAMDataSource {
|
||||||
* @param validationStringency validation stringency.
|
* @param validationStringency validation stringency.
|
||||||
*/
|
*/
|
||||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||||
|
int totalNumberOfFiles = readerIDs.size();
|
||||||
|
int readerNumber = 1;
|
||||||
for(SAMReaderID readerID: readerIDs) {
|
for(SAMReaderID readerID: readerIDs) {
|
||||||
File indexFile = findIndexFile(readerID.samFile);
|
File indexFile = findIndexFile(readerID.samFile);
|
||||||
|
|
||||||
|
|
@ -728,8 +730,7 @@ public class SAMDataSource {
|
||||||
reader.enableFileSource(true);
|
reader.enableFileSource(true);
|
||||||
reader.setValidationStringency(validationStringency);
|
reader.setValidationStringency(validationStringency);
|
||||||
|
|
||||||
final SAMFileHeader header = reader.getFileHeader();
|
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
|
||||||
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
|
||||||
|
|
||||||
readers.put(readerID,reader);
|
readers.put(readerID,reader);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -142,9 +142,21 @@ public class SampleDB {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public final Map<String, Set<Sample>> getFamilies() {
|
public final Map<String, Set<Sample>> getFamilies() {
|
||||||
|
return getFamilies(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a map from family ID -> set of family members for all samples in sampleIds with
|
||||||
|
* non-null family ids
|
||||||
|
*
|
||||||
|
* @param sampleIds - all samples to include. If null is passed then all samples are returned.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public final Map<String, Set<Sample>> getFamilies(Collection<String> sampleIds) {
|
||||||
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
|
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
|
||||||
|
|
||||||
for ( final Sample sample : samples.values() ) {
|
for ( final Sample sample : samples.values() ) {
|
||||||
|
if(sampleIds == null || sampleIds.contains(sample.getID())){
|
||||||
final String famID = sample.getFamilyID();
|
final String famID = sample.getFamilyID();
|
||||||
if ( famID != null ) {
|
if ( famID != null ) {
|
||||||
if ( ! families.containsKey(famID) )
|
if ( ! families.containsKey(famID) )
|
||||||
|
|
@ -152,10 +164,53 @@ public class SampleDB {
|
||||||
families.get(famID).add(sample);
|
families.get(famID).add(sample);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return families;
|
return families;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set of all children that have both of their parents.
|
||||||
|
* Note that if a family is composed of more than 1 child, each child is
|
||||||
|
* returned.
|
||||||
|
* @return - all the children that have both of their parents
|
||||||
|
*/
|
||||||
|
public final Set<Sample> getChildrenWithParents(){
|
||||||
|
return getChildrenWithParents(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set of all children that have both of their parents.
|
||||||
|
* Note that if triosOnly = false, a family is composed of more than 1 child, each child is
|
||||||
|
* returned.
|
||||||
|
*
|
||||||
|
* This method can be used wherever trios are needed
|
||||||
|
*
|
||||||
|
* @param triosOnly - if set to true, only strict trios are returned
|
||||||
|
* @return - all the children that have both of their parents
|
||||||
|
*/
|
||||||
|
public final Set<Sample> getChildrenWithParents(boolean triosOnly) {
|
||||||
|
|
||||||
|
Map<String, Set<Sample>> families = getFamilies();
|
||||||
|
final Set<Sample> childrenWithParents = new HashSet<Sample>();
|
||||||
|
Iterator<Sample> sampleIterator;
|
||||||
|
|
||||||
|
for ( Set<Sample> familyMembers: families.values() ) {
|
||||||
|
if(triosOnly && familyMembers.size() != 3)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
sampleIterator = familyMembers.iterator();
|
||||||
|
Sample sample;
|
||||||
|
while(sampleIterator.hasNext()){
|
||||||
|
sample = sampleIterator.next();
|
||||||
|
if(sample.getParents().size() == 2 && familyMembers.containsAll(sample.getParents()))
|
||||||
|
childrenWithParents.add(sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return childrenWithParents;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return all samples with a given family ID
|
* Return all samples with a given family ID
|
||||||
* @param familyId
|
* @param familyId
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,7 @@ public abstract class Walker<MapType, ReduceType> {
|
||||||
return getToolkit().getMasterSequenceDictionary();
|
return getToolkit().getMasterSequenceDictionary();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SampleDB getSampleDB() {
|
public SampleDB getSampleDB() {
|
||||||
return getToolkit().getSampleDB();
|
return getToolkit().getSampleDB();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,22 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
|
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -30,23 +26,26 @@ import java.util.Map;
|
||||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
private MendelianViolation mendelianViolation = null;
|
private MendelianViolation mendelianViolation = null;
|
||||||
|
private String motherId;
|
||||||
|
private String fatherId;
|
||||||
|
private String childId;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( mendelianViolation == null ) {
|
if ( mendelianViolation == null ) {
|
||||||
if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) {
|
if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
|
||||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line.");
|
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||||
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() &&
|
boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() &&
|
||||||
vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() &&
|
vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() &&
|
||||||
vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods();
|
vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods();
|
||||||
if ( hasAppropriateGenotypes )
|
if ( hasAppropriateGenotypes )
|
||||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
|
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId));
|
||||||
|
|
||||||
return toRet;
|
return toRet;
|
||||||
}
|
}
|
||||||
|
|
@ -55,4 +54,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||||
|
|
||||||
|
private boolean checkAndSetSamples(SampleDB db){
|
||||||
|
Set<String> families = db.getFamilyIDs();
|
||||||
|
if(families.size() != 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Set<Sample> family = db.getFamily(families.iterator().next());
|
||||||
|
if(family.size() != 3)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Iterator<Sample> sampleIter = family.iterator();
|
||||||
|
Sample sample;
|
||||||
|
for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){
|
||||||
|
if(sample.getParents().size()==2){
|
||||||
|
motherId = sample.getMaternalID();
|
||||||
|
fatherId = sample.getPaternalID();
|
||||||
|
childId = sample.getID();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
||||||
for ( final Genotype genotype : genotypes ) {
|
for ( final Genotype genotype : genotypes ) {
|
||||||
|
|
||||||
// we care only about variant calls with likelihoods
|
// we care only about variant calls with likelihoods
|
||||||
if ( genotype.isHomRef() )
|
if ( !genotype.isHet() && !genotype.isHomVar() )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||||
|
|
|
||||||
|
|
@ -12,10 +12,8 @@ import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -26,42 +24,33 @@ import java.util.*;
|
||||||
|
|
||||||
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
private Set<MendelianViolation> fullMVSet = null;
|
private Set<Sample> trios = null;
|
||||||
private final static int REF = 0;
|
private final static int REF = 0;
|
||||||
private final static int HET = 1;
|
private final static int HET = 1;
|
||||||
private final static int HOM = 2;
|
private final static int HOM = 2;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( fullMVSet == null ) {
|
if ( trios == null ) {
|
||||||
fullMVSet = new HashSet<MendelianViolation>();
|
|
||||||
|
|
||||||
if ( walker instanceof VariantAnnotator ) {
|
if ( walker instanceof VariantAnnotator ) {
|
||||||
final Map<String,Set<Sample>> families = ((VariantAnnotator) walker).getSampleDB().getFamilies();
|
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
|
||||||
for( final Set<Sample> family : families.values() ) {
|
|
||||||
for( final Sample sample : family ) {
|
|
||||||
if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now
|
|
||||||
fullMVSet.add( new MendelianViolation(sample, 0.0) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
|
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final Map<String,Object> toRet = new HashMap<String,Object>(1);
|
final Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||||
final HashSet<MendelianViolation> mvsToTest = new HashSet<MendelianViolation>();
|
final HashSet<Sample> triosToTest = new HashSet<Sample>();
|
||||||
|
|
||||||
for( final MendelianViolation mv : fullMVSet ) {
|
for( final Sample child : trios) {
|
||||||
final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() &&
|
final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() &&
|
||||||
vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() &&
|
vc.hasGenotype(child.getPaternalID()) && vc.getGenotype(child.getPaternalID()).hasLikelihoods() &&
|
||||||
vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods();
|
vc.hasGenotype(child.getMaternalID()) && vc.getGenotype(child.getMaternalID()).hasLikelihoods();
|
||||||
if ( hasAppropriateGenotypes ) {
|
if ( hasAppropriateGenotypes ) {
|
||||||
mvsToTest.add(mv);
|
triosToTest.add(child);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
toRet.put("TDT", calculateTDT( vc, mvsToTest ));
|
toRet.put("TDT", calculateTDT( vc, triosToTest ));
|
||||||
|
|
||||||
return toRet;
|
return toRet;
|
||||||
}
|
}
|
||||||
|
|
@ -72,27 +61,27 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
||||||
|
|
||||||
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
||||||
private double calculateTDT( final VariantContext vc, final Set<MendelianViolation> mvsToTest ) {
|
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
|
||||||
|
|
||||||
final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM);
|
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM);
|
||||||
final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM);
|
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM);
|
||||||
final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET);
|
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
|
||||||
final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET);
|
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
|
||||||
final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET);
|
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET);
|
||||||
final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET);
|
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET);
|
||||||
|
|
||||||
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
|
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
|
||||||
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
|
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
|
||||||
return (numer * numer) / denom;
|
return (numer * numer) / denom;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateNChildren( final VariantContext vc, final Set<MendelianViolation> mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
||||||
final double likelihoodVector[] = new double[mvsToTest.size() * 2];
|
final double likelihoodVector[] = new double[triosToTest.size() * 2];
|
||||||
int iii = 0;
|
int iii = 0;
|
||||||
for( final MendelianViolation mv : mvsToTest ) {
|
for( final Sample child : triosToTest ) {
|
||||||
final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector();
|
final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
|
||||||
final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector();
|
final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
|
||||||
final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector();
|
final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
|
||||||
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
|
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
|
||||||
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
|
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -167,9 +167,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||||
protected boolean indelsOnly = false;
|
protected boolean indelsOnly = false;
|
||||||
|
|
||||||
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
|
|
||||||
public String familyStr = null;
|
|
||||||
|
|
||||||
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
||||||
public double minGenotypeQualityP = 0.0;
|
public double minGenotypeQualityP = 0.0;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
|
|
||||||
protected enum GenotypeType { AA, AB, BB }
|
protected enum GenotypeType { AA, AB, BB }
|
||||||
|
|
||||||
protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE;
|
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
this.N = N;
|
this.N = N;
|
||||||
|
|
@ -65,21 +65,11 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
* @param GLs genotype likelihoods
|
* @param GLs genotype likelihoods
|
||||||
* @param Alleles Alleles corresponding to GLs
|
* @param Alleles Alleles corresponding to GLs
|
||||||
* @param log10AlleleFrequencyPriors priors
|
* @param log10AlleleFrequencyPriors priors
|
||||||
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
|
* @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results
|
||||||
|
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results
|
||||||
*/
|
*/
|
||||||
protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles,
|
protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[][] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors);
|
double[][] log10AlleleFrequencyLikelihoods,
|
||||||
|
double[][] log10AlleleFrequencyPosteriors);
|
||||||
/**
|
|
||||||
* Can be overridden by concrete subclasses
|
|
||||||
* @param vc variant context with genotype likelihoods
|
|
||||||
* @param log10AlleleFrequencyPosteriors allele frequency results
|
|
||||||
* @param AFofMaxLikelihood allele frequency of max likelihood
|
|
||||||
*
|
|
||||||
* @return calls
|
|
||||||
*/
|
|
||||||
protected abstract GenotypesContext assignGenotypes(VariantContext vc,
|
|
||||||
double[] log10AlleleFrequencyPosteriors,
|
|
||||||
int AFofMaxLikelihood);
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,94 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
|
||||||
|
|
||||||
public class BiallelicGenotypeLikelihoods {
|
|
||||||
|
|
||||||
private String sample;
|
|
||||||
private double[] GLs;
|
|
||||||
private Allele A, B;
|
|
||||||
private int depth;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new object for sample with given alleles and genotype likelihoods
|
|
||||||
*
|
|
||||||
* @param sample sample name
|
|
||||||
* @param A allele A
|
|
||||||
* @param B allele B
|
|
||||||
* @param log10AALikelihoods AA likelihoods
|
|
||||||
* @param log10ABLikelihoods AB likelihoods
|
|
||||||
* @param log10BBLikelihoods BB likelihoods
|
|
||||||
* @param depth the read depth used in creating the likelihoods
|
|
||||||
*/
|
|
||||||
public BiallelicGenotypeLikelihoods(String sample,
|
|
||||||
Allele A,
|
|
||||||
Allele B,
|
|
||||||
double log10AALikelihoods,
|
|
||||||
double log10ABLikelihoods,
|
|
||||||
double log10BBLikelihoods,
|
|
||||||
int depth) {
|
|
||||||
this.sample = sample;
|
|
||||||
this.A = A;
|
|
||||||
this.B = B;
|
|
||||||
this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods};
|
|
||||||
this.depth = depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSample() {
|
|
||||||
return sample;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getAALikelihoods() {
|
|
||||||
return GLs[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getABLikelihoods() {
|
|
||||||
return GLs[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getBBLikelihoods() {
|
|
||||||
return GLs[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[] getLikelihoods() {
|
|
||||||
return GLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Allele getAlleleA() {
|
|
||||||
return A;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Allele getAlleleB() {
|
|
||||||
return B;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getDepth() {
|
|
||||||
return depth;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: depristo
|
|
||||||
* Date: Aug 4, 2009
|
|
||||||
* Time: 6:46:09 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public enum DiploidGenotype {
|
public enum DiploidGenotype {
|
||||||
AA ('A', 'A'),
|
AA ('A', 'A'),
|
||||||
AC ('A', 'C'),
|
AC ('A', 'C'),
|
||||||
|
|
@ -110,6 +103,20 @@ public enum DiploidGenotype {
|
||||||
return conversionMatrix[index1][index2];
|
return conversionMatrix[index1][index2];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
|
||||||
|
* @param baseIndex1 base1
|
||||||
|
* @param baseIndex2 base2
|
||||||
|
* @return the diploid genotype
|
||||||
|
*/
|
||||||
|
public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) {
|
||||||
|
if ( baseIndex1 == -1 )
|
||||||
|
throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
|
||||||
|
if ( baseIndex2 == -1 )
|
||||||
|
throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
|
||||||
|
return conversionMatrix[baseIndex1][baseIndex2];
|
||||||
|
}
|
||||||
|
|
||||||
private static final DiploidGenotype[][] conversionMatrix = {
|
private static final DiploidGenotype[][] conversionMatrix = {
|
||||||
{ DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT },
|
{ DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT },
|
||||||
{ DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT },
|
{ DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT },
|
||||||
|
|
|
||||||
|
|
@ -27,80 +27,43 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
//
|
|
||||||
// code for testing purposes
|
|
||||||
//
|
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
|
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
|
|
||||||
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
|
||||||
private final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
|
||||||
|
|
||||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getLog10PNonRef(GenotypesContext GLs, List<Allele> alleles,
|
public void getLog10PNonRef(final GenotypesContext GLs,
|
||||||
double[] log10AlleleFrequencyPriors,
|
final List<Allele> alleles,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
final double[][] log10AlleleFrequencyPriors,
|
||||||
|
final double[][] log10AlleleFrequencyLikelihoods,
|
||||||
|
final double[][] log10AlleleFrequencyPosteriors) {
|
||||||
final int numAlleles = alleles.size();
|
final int numAlleles = alleles.size();
|
||||||
final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
|
|
||||||
final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
|
|
||||||
|
|
||||||
int idxDiag = numAlleles;
|
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||||
int incr = numAlleles - 1;
|
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
|
||||||
for (int k=1; k < numAlleles; k++) {
|
|
||||||
// multi-allelic approximation, part 1: Ideally
|
|
||||||
// for each alt allele compute marginal (suboptimal) posteriors -
|
|
||||||
// compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of
|
|
||||||
// as a row-wise upper triangular matrix of likelihoods.
|
|
||||||
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
|
|
||||||
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
|
|
||||||
|
|
||||||
final int idxAA = 0;
|
|
||||||
final int idxAB = k;
|
|
||||||
// yy is always element on the diagonal.
|
|
||||||
// 2 alleles: BBelement 2
|
|
||||||
// 3 alleles: BB element 3. CC element 5
|
|
||||||
// 4 alleles:
|
|
||||||
final int idxBB = idxDiag;
|
|
||||||
idxDiag += incr--;
|
|
||||||
|
|
||||||
final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
|
||||||
|
|
||||||
if (numAlleles > 2) {
|
|
||||||
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
|
|
||||||
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numAlleles > 2) {
|
|
||||||
// multiallelic approximation, part 2:
|
|
||||||
// report posteriors for allele that has highest estimated AC
|
|
||||||
int mostLikelyAlleleIdx = MathUtils.maxElementIndex(bestAFguess);
|
|
||||||
for (int k=0; k < log10AlleleFrequencyPosteriors.length-1; k++)
|
|
||||||
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
|
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); // TODO -- initialize with size of GLs
|
||||||
|
|
||||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||||
if ( sample.hasLikelihoods() ) {
|
if ( sample.hasLikelihoods() ) {
|
||||||
double[] gls = sample.getLikelihoods().getAsVector();
|
double[] gls = sample.getLikelihoods().getAsVector();
|
||||||
|
|
||||||
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
|
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
||||||
genotypeLikelihoods.add(gls);
|
genotypeLikelihoods.add(gls);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -109,9 +72,401 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
final static double approximateLog10SumLog10(double[] vals) {
|
||||||
|
if ( vals.length < 2 )
|
||||||
|
throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10");
|
||||||
|
|
||||||
|
double approx = approximateLog10SumLog10(vals[0], vals[1]);
|
||||||
|
for ( int i = 2; i < vals.length; i++ )
|
||||||
|
approx = approximateLog10SumLog10(approx, vals[i]);
|
||||||
|
return approx;
|
||||||
|
}
|
||||||
|
|
||||||
|
final static double approximateLog10SumLog10(double small, double big) {
|
||||||
|
// make sure small is really the smaller value
|
||||||
|
if ( small > big ) {
|
||||||
|
final double t = big;
|
||||||
|
big = small;
|
||||||
|
small = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
|
||||||
|
return big;
|
||||||
|
|
||||||
|
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
|
||||||
|
return big;
|
||||||
|
|
||||||
|
// OK, so |y-x| < tol: we use the following identity then:
|
||||||
|
// we need to compute log10(10^x + 10^y)
|
||||||
|
// By Jacobian logarithm identity, this is equal to
|
||||||
|
// max(x,y) + log10(1+10^-abs(x-y))
|
||||||
|
// we compute the second term as a table lookup
|
||||||
|
// with integer quantization
|
||||||
|
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||||
|
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||||
|
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||||
|
|
||||||
|
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
||||||
|
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
||||||
|
return big + MathUtils.jacobianLogTable[ind];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Linearized, ~O(N), implementation.
|
// Multi-allelic implementation.
|
||||||
|
//
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||||
|
|
||||||
|
// a wrapper around the int array so that we can make it hashable
|
||||||
|
private static final class ExactACcounts {
|
||||||
|
|
||||||
|
private final int[] counts;
|
||||||
|
private int hashcode = -1;
|
||||||
|
|
||||||
|
public ExactACcounts(final int[] counts) {
|
||||||
|
this.counts = counts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int[] getCounts() {
|
||||||
|
return counts;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
if ( hashcode == -1 )
|
||||||
|
hashcode = Arrays.hashCode(counts);
|
||||||
|
return hashcode;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
sb.append(counts[0]);
|
||||||
|
for ( int i = 1; i < counts.length; i++ ) {
|
||||||
|
sb.append("/");
|
||||||
|
sb.append(counts[i]);
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This class represents a column in the Exact AC calculation matrix
|
||||||
|
private static final class ExactACset {
|
||||||
|
|
||||||
|
// the counts of the various alternate alleles which this column represents
|
||||||
|
final ExactACcounts ACcounts;
|
||||||
|
|
||||||
|
// the column of the matrix
|
||||||
|
final double[] log10Likelihoods;
|
||||||
|
|
||||||
|
// mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column;
|
||||||
|
// for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB.
|
||||||
|
final HashMap<ExactACcounts, Integer> ACsetIndexToPLIndex = new HashMap<ExactACcounts, Integer>();
|
||||||
|
|
||||||
|
// to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them
|
||||||
|
final ArrayList<ExactACcounts> dependentACsetsToDelete = new ArrayList<ExactACcounts>();
|
||||||
|
|
||||||
|
|
||||||
|
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||||
|
this.ACcounts = ACcounts;
|
||||||
|
log10Likelihoods = new double[size];
|
||||||
|
}
|
||||||
|
|
||||||
|
// sum of all the non-reference alleles
|
||||||
|
public int getACsum() {
|
||||||
|
int sum = 0;
|
||||||
|
for ( int count : ACcounts.getCounts() )
|
||||||
|
sum += count;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void linearExactMultiAllelic(final GenotypesContext GLs,
|
||||||
|
final int numAlternateAlleles,
|
||||||
|
final double[][] log10AlleleFrequencyPriors,
|
||||||
|
final double[][] log10AlleleFrequencyLikelihoods,
|
||||||
|
final double[][] log10AlleleFrequencyPosteriors,
|
||||||
|
final boolean preserveData) {
|
||||||
|
|
||||||
|
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||||
|
final int numSamples = genotypeLikelihoods.size()-1;
|
||||||
|
final int numChr = 2*numSamples;
|
||||||
|
|
||||||
|
// queue of AC conformations to process
|
||||||
|
final Queue<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||||
|
|
||||||
|
// mapping of ExactACset indexes to the objects
|
||||||
|
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
|
||||||
|
|
||||||
|
// add AC=0 to the queue
|
||||||
|
int[] zeroCounts = new int[numAlternateAlleles];
|
||||||
|
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
|
||||||
|
ACqueue.add(zeroSet);
|
||||||
|
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||||
|
|
||||||
|
// keep processing while we have AC conformations that need to be calculated
|
||||||
|
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
|
while ( !ACqueue.isEmpty() ) {
|
||||||
|
// compute log10Likelihoods
|
||||||
|
final ExactACset set = ACqueue.remove();
|
||||||
|
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||||
|
|
||||||
|
// adjust max likelihood seen if needed
|
||||||
|
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static double calculateAlleleCountConformation(final ExactACset set,
|
||||||
|
final ArrayList<double[]> genotypeLikelihoods,
|
||||||
|
final double maxLog10L,
|
||||||
|
final int numChr,
|
||||||
|
final boolean preserveData,
|
||||||
|
final Queue<ExactACset> ACqueue,
|
||||||
|
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||||
|
final double[][] log10AlleleFrequencyPriors,
|
||||||
|
final double[][] log10AlleleFrequencyLikelihoods,
|
||||||
|
final double[][] log10AlleleFrequencyPosteriors) {
|
||||||
|
|
||||||
|
if ( DEBUG )
|
||||||
|
System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||||
|
|
||||||
|
// compute the log10Likelihoods
|
||||||
|
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||||
|
|
||||||
|
// clean up memory
|
||||||
|
if ( !preserveData ) {
|
||||||
|
for ( ExactACcounts index : set.dependentACsetsToDelete ) {
|
||||||
|
indexesToACset.put(index, null);
|
||||||
|
if ( DEBUG )
|
||||||
|
System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||||
|
|
||||||
|
// can we abort early because the log10Likelihoods are so small?
|
||||||
|
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||||
|
if ( DEBUG )
|
||||||
|
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||||
|
|
||||||
|
// no reason to keep this data around because nothing depends on it
|
||||||
|
if ( !preserveData )
|
||||||
|
indexesToACset.put(set.ACcounts, null);
|
||||||
|
|
||||||
|
return log10LofK;
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate over higher frequencies if possible
|
||||||
|
final int ACwiggle = numChr - set.getACsum();
|
||||||
|
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||||
|
return log10LofK;
|
||||||
|
|
||||||
|
ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing
|
||||||
|
final int numAltAlleles = set.ACcounts.getCounts().length;
|
||||||
|
|
||||||
|
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
|
||||||
|
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
|
||||||
|
|
||||||
|
// add conformations for the k+1 case
|
||||||
|
int PLindex = 0;
|
||||||
|
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
|
||||||
|
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||||
|
ACcountsClone[allele]++;
|
||||||
|
lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||||
|
if ( ACwiggle > 1 ) {
|
||||||
|
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
|
||||||
|
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
|
||||||
|
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||||
|
ACcountsClone[allele_i]++;
|
||||||
|
ACcountsClone[allele_j]++;
|
||||||
|
lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
|
||||||
|
// over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
|
||||||
|
if ( !preserveData && lastSet == null ) {
|
||||||
|
if ( DEBUG )
|
||||||
|
System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
|
||||||
|
lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
|
||||||
|
}
|
||||||
|
if ( lastSet != null )
|
||||||
|
lastSet.dependentACsetsToDelete.add(set.ACcounts);
|
||||||
|
|
||||||
|
return log10LofK;
|
||||||
|
}
|
||||||
|
|
||||||
|
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
||||||
|
// also adds it as a dependency to the given callingSetIndex.
|
||||||
|
// returns the ExactACset if that set was not already in the queue and null otherwise.
|
||||||
|
private static ExactACset updateACset(final int[] ACcounts,
|
||||||
|
final int numChr,
|
||||||
|
final ExactACset callingSet,
|
||||||
|
final int PLsetIndex,
|
||||||
|
final Queue<ExactACset> ACqueue,
|
||||||
|
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||||
|
final ExactACcounts index = new ExactACcounts(ACcounts);
|
||||||
|
boolean wasInQueue = true;
|
||||||
|
if ( !indexesToACset.containsKey(index) ) {
|
||||||
|
ExactACset set = new ExactACset(numChr/2 +1, index);
|
||||||
|
indexesToACset.put(index, set);
|
||||||
|
ACqueue.add(set);
|
||||||
|
wasInQueue = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add the given dependency to the set
|
||||||
|
final ExactACset set = indexesToACset.get(index);
|
||||||
|
set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex);
|
||||||
|
return wasInQueue ? null : set;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final Queue<ExactACset> ACqueue) {
|
||||||
|
ExactACset set = null;
|
||||||
|
for ( ExactACset queued : ACqueue ) {
|
||||||
|
if ( queued.dependentACsetsToDelete.contains(callingSetIndex) )
|
||||||
|
set = queued;
|
||||||
|
}
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void computeLofK(final ExactACset set,
|
||||||
|
final ArrayList<double[]> genotypeLikelihoods,
|
||||||
|
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||||
|
final double[][] log10AlleleFrequencyPriors,
|
||||||
|
final double[][] log10AlleleFrequencyLikelihoods,
|
||||||
|
final double[][] log10AlleleFrequencyPosteriors) {
|
||||||
|
|
||||||
|
set.log10Likelihoods[0] = 0.0; // the zero case
|
||||||
|
final int totalK = set.getACsum();
|
||||||
|
|
||||||
|
// special case for k = 0 over all k
|
||||||
|
if ( totalK == 0 ) {
|
||||||
|
for ( int j = 1; j < set.log10Likelihoods.length; j++ )
|
||||||
|
set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
|
||||||
|
}
|
||||||
|
// k > 0 for at least one k
|
||||||
|
else {
|
||||||
|
// all possible likelihoods for a given cell from which to choose the max
|
||||||
|
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
|
||||||
|
final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it
|
||||||
|
|
||||||
|
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||||
|
final double[] gl = genotypeLikelihoods.get(j);
|
||||||
|
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||||
|
|
||||||
|
// initialize
|
||||||
|
for ( int i = 0; i < numPaths; i++ )
|
||||||
|
// TODO -- Arrays.fill?
|
||||||
|
// todo -- is this even necessary? Why not have as else below?
|
||||||
|
log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
|
// deal with the AA case first
|
||||||
|
if ( totalK < 2*j-1 )
|
||||||
|
log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
||||||
|
|
||||||
|
// deal with the other possible conformations now
|
||||||
|
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||||
|
int conformationIndex = 1;
|
||||||
|
for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
|
||||||
|
if ( DEBUG )
|
||||||
|
System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
|
||||||
|
log10ConformationLikelihoods[conformationIndex++] =
|
||||||
|
determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods);
|
||||||
|
|
||||||
|
// finally, update the L(j,k) value
|
||||||
|
set.log10Likelihoods[j] = log10Max - logDenominator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||||
|
|
||||||
|
// determine the power of theta to use
|
||||||
|
int nonRefAlleles = 0;
|
||||||
|
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
|
||||||
|
if ( set.ACcounts.getCounts()[i] > 0 )
|
||||||
|
nonRefAlleles++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
|
||||||
|
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
|
||||||
|
int AC = set.ACcounts.getCounts()[i];
|
||||||
|
log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
|
||||||
|
|
||||||
|
// for k=0 we still want to use theta
|
||||||
|
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
|
||||||
|
log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
|
||||||
|
// todo -- arent' there a small number of fixed values that this function can adopt?
|
||||||
|
// todo -- at a minimum it'd be good to partially compute some of these in ACCounts for performance
|
||||||
|
// todo -- need to cache PLIndex -> two alleles, compute looping over each PLIndex. Note all other operations are efficient
|
||||||
|
// todo -- this can be computed once at the start of the all operations
|
||||||
|
|
||||||
|
// the closed form representation generalized for multiple alleles is as follows:
|
||||||
|
// AA: (2j - totalK) * (2j - totalK - 1)
|
||||||
|
// AB: 2k_b * (2j - totalK)
|
||||||
|
// AC: 2k_c * (2j - totalK)
|
||||||
|
// BB: k_b * (k_b - 1)
|
||||||
|
// BC: 2 * k_b * k_c
|
||||||
|
// CC: k_c * (k_c - 1)
|
||||||
|
|
||||||
|
final int numAltAlleles = ACcounts.length;
|
||||||
|
|
||||||
|
// the AX het case
|
||||||
|
if ( PLindex <= numAltAlleles )
|
||||||
|
return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK];
|
||||||
|
|
||||||
|
int subtractor = numAltAlleles+1;
|
||||||
|
int subtractions = 0;
|
||||||
|
do {
|
||||||
|
PLindex -= subtractor;
|
||||||
|
subtractor--;
|
||||||
|
subtractions++;
|
||||||
|
}
|
||||||
|
while ( PLindex >= subtractor );
|
||||||
|
|
||||||
|
final int k_i = ACcounts[subtractions-1];
|
||||||
|
|
||||||
|
// the hom var case (e.g. BB, CC, DD)
|
||||||
|
final double coeff;
|
||||||
|
if ( PLindex == 0 ) {
|
||||||
|
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
|
||||||
|
}
|
||||||
|
// the het non-ref case (e.g. BC, BD, CD)
|
||||||
|
else {
|
||||||
|
final int k_j = ACcounts[subtractions+PLindex-1];
|
||||||
|
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
|
||||||
|
}
|
||||||
|
|
||||||
|
return coeff;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Deprecated bi-allelic ~O(N) implementation. Kept here for posterity.
|
||||||
//
|
//
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
@ -119,6 +474,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
|
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
|
||||||
* for the exact model calculation
|
* for the exact model calculation
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
private final static class ExactACCache {
|
private final static class ExactACCache {
|
||||||
double[] kMinus2, kMinus1, kMinus0;
|
double[] kMinus2, kMinus1, kMinus0;
|
||||||
|
|
||||||
|
|
@ -154,7 +510,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
public int linearExact(GenotypesContext GLs,
|
public int linearExact(GenotypesContext GLs,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
double[][] log10AlleleFrequencyLikelihoods,
|
||||||
|
double[][] log10AlleleFrequencyPosteriors) {
|
||||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||||
final int numSamples = genotypeLikelihoods.size()-1;
|
final int numSamples = genotypeLikelihoods.size()-1;
|
||||||
final int numChr = 2*numSamples;
|
final int numChr = 2*numSamples;
|
||||||
|
|
@ -171,7 +528,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
if ( k == 0 ) { // special case for k = 0
|
if ( k == 0 ) { // special case for k = 0
|
||||||
for ( int j=1; j <= numSamples; j++ ) {
|
for ( int j=1; j <= numSamples; j++ ) {
|
||||||
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA];
|
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0];
|
||||||
}
|
}
|
||||||
} else { // k > 0
|
} else { // k > 0
|
||||||
final double[] kMinus1 = logY.getkMinus1();
|
final double[] kMinus1 = logY.getkMinus1();
|
||||||
|
|
@ -184,14 +541,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
double aa = Double.NEGATIVE_INFINITY;
|
double aa = Double.NEGATIVE_INFINITY;
|
||||||
double ab = Double.NEGATIVE_INFINITY;
|
double ab = Double.NEGATIVE_INFINITY;
|
||||||
if (k < 2*j-1)
|
if (k < 2*j-1)
|
||||||
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[idxAA];
|
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
|
||||||
|
|
||||||
if (k < 2*j)
|
if (k < 2*j)
|
||||||
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[idxAB];
|
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
|
||||||
|
|
||||||
double log10Max;
|
double log10Max;
|
||||||
if (k > 1) {
|
if (k > 1) {
|
||||||
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[idxBB];
|
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
|
||||||
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
||||||
} else {
|
} else {
|
||||||
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
||||||
|
|
@ -205,7 +562,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
// update the posteriors vector
|
// update the posteriors vector
|
||||||
final double log10LofK = kMinus0[numSamples];
|
final double log10LofK = kMinus0[numSamples];
|
||||||
log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
|
log10AlleleFrequencyLikelihoods[0][k] = log10LofK;
|
||||||
|
log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k];
|
||||||
|
|
||||||
// can we abort early?
|
// can we abort early?
|
||||||
lastK = k;
|
lastK = k;
|
||||||
|
|
@ -222,202 +580,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
final static double approximateLog10SumLog10(double a, double b, double c) {
|
final static double approximateLog10SumLog10(double a, double b, double c) {
|
||||||
//return softMax(new double[]{a, b, c});
|
|
||||||
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
|
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
final static double approximateLog10SumLog10(double small, double big) {
|
|
||||||
// make sure small is really the smaller value
|
|
||||||
if ( small > big ) {
|
|
||||||
final double t = big;
|
|
||||||
big = small;
|
|
||||||
small = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
|
|
||||||
return big;
|
|
||||||
|
|
||||||
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
|
|
||||||
return big;
|
|
||||||
|
|
||||||
// OK, so |y-x| < tol: we use the following identity then:
|
|
||||||
// we need to compute log10(10^x + 10^y)
|
|
||||||
// By Jacobian logarithm identity, this is equal to
|
|
||||||
// max(x,y) + log10(1+10^-abs(x-y))
|
|
||||||
// we compute the second term as a table lookup
|
|
||||||
// with integer quantization
|
|
||||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
|
||||||
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
|
||||||
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
|
||||||
|
|
||||||
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
|
||||||
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
|
||||||
return big + MathUtils.jacobianLogTable[ind];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Can be overridden by concrete subclasses
|
|
||||||
* @param vc variant context with genotype likelihoods
|
|
||||||
* @param log10AlleleFrequencyPosteriors allele frequency results
|
|
||||||
* @param AFofMaxLikelihood allele frequency of max likelihood
|
|
||||||
*
|
|
||||||
* @return calls
|
|
||||||
*/
|
|
||||||
public GenotypesContext assignGenotypes(VariantContext vc,
|
|
||||||
double[] log10AlleleFrequencyPosteriors,
|
|
||||||
int AFofMaxLikelihood) {
|
|
||||||
if ( !vc.isVariant() )
|
|
||||||
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
|
|
||||||
|
|
||||||
|
|
||||||
GenotypesContext GLs = vc.getGenotypes();
|
|
||||||
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
|
|
||||||
int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1];
|
|
||||||
|
|
||||||
ArrayList<String> sampleIndices = new ArrayList<String>();
|
|
||||||
int sampleIdx = 0;
|
|
||||||
|
|
||||||
// todo - optimize initialization
|
|
||||||
for (int k=0; k <= AFofMaxLikelihood; k++)
|
|
||||||
for (int j=0; j <= GLs.size(); j++)
|
|
||||||
pathMetricArray[j][k] = -1e30;
|
|
||||||
|
|
||||||
pathMetricArray[0][0] = 0.0;
|
|
||||||
|
|
||||||
// todo = can't deal with optimal dynamic programming solution with multiallelic records
|
|
||||||
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
|
|
||||||
sampleIndices.addAll(GLs.getSampleNamesOrderedByName());
|
|
||||||
sampleIdx = GLs.size();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
|
|
||||||
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
|
|
||||||
if ( !genotype.hasLikelihoods() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
double[] likelihoods = genotype.getLikelihoods().getAsVector();
|
|
||||||
|
|
||||||
if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) {
|
|
||||||
//System.out.print(sample.getKey()+":");
|
|
||||||
//for (int k=0; k < likelihoods.length; k++)
|
|
||||||
// System.out.format("%4.2f ",likelihoods[k]);
|
|
||||||
//System.out.println();
|
|
||||||
// all likelihoods are essentially the same: skip this sample and will later on force no call.
|
|
||||||
//sampleIdx++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
sampleIndices.add(genotype.getSampleName());
|
|
||||||
|
|
||||||
for (int k=0; k <= AFofMaxLikelihood; k++) {
|
|
||||||
|
|
||||||
double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0];
|
|
||||||
int bestIndex = k;
|
|
||||||
|
|
||||||
if (k>0) {
|
|
||||||
double m2 = pathMetricArray[sampleIdx][k-1] + likelihoods[1];
|
|
||||||
if (m2 > bestMetric) {
|
|
||||||
bestMetric = m2;
|
|
||||||
bestIndex = k-1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (k>1) {
|
|
||||||
double m2 = pathMetricArray[sampleIdx][k-2] + likelihoods[2];
|
|
||||||
if (m2 > bestMetric) {
|
|
||||||
bestMetric = m2;
|
|
||||||
bestIndex = k-2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pathMetricArray[sampleIdx+1][k] = bestMetric;
|
|
||||||
tracebackArray[sampleIdx+1][k] = bestIndex;
|
|
||||||
}
|
|
||||||
sampleIdx++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
GenotypesContext calls = GenotypesContext.create();
|
|
||||||
|
|
||||||
int startIdx = AFofMaxLikelihood;
|
|
||||||
for (int k = sampleIdx; k > 0; k--) {
|
|
||||||
int bestGTguess;
|
|
||||||
String sample = sampleIndices.get(k-1);
|
|
||||||
Genotype g = GLs.get(sample);
|
|
||||||
if ( !g.hasLikelihoods() )
|
|
||||||
continue;
|
|
||||||
// if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now,
|
|
||||||
// and will add no-call genotype to GL's in a second pass
|
|
||||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
|
||||||
|
|
||||||
double[] likelihoods = g.getLikelihoods().getAsVector();
|
|
||||||
|
|
||||||
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
|
|
||||||
bestGTguess = Utils.findIndexOfMaxEntry(likelihoods);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
int newIdx = tracebackArray[k][startIdx];;
|
|
||||||
bestGTguess = startIdx - newIdx;
|
|
||||||
startIdx = newIdx;
|
|
||||||
}
|
|
||||||
|
|
||||||
// likelihoods are stored row-wise in lower triangular matrix. IE
|
|
||||||
// for 2 alleles they have ordering AA,AB,BB
|
|
||||||
// for 3 alleles they are ordered AA,AB,BB,AC,BC,CC
|
|
||||||
// Get now alleles corresponding to best index
|
|
||||||
int kk=0;
|
|
||||||
boolean done = false;
|
|
||||||
for (int j=0; j < vc.getNAlleles(); j++) {
|
|
||||||
for (int i=0; i <= j; i++){
|
|
||||||
if (kk++ == bestGTguess) {
|
|
||||||
if (i==0)
|
|
||||||
myAlleles.add(vc.getReference());
|
|
||||||
else
|
|
||||||
myAlleles.add(vc.getAlternateAllele(i-1));
|
|
||||||
|
|
||||||
if (j==0)
|
|
||||||
myAlleles.add(vc.getReference());
|
|
||||||
else
|
|
||||||
myAlleles.add(vc.getAlternateAllele(j-1));
|
|
||||||
done = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
if (done)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(bestGTguess, likelihoods);
|
|
||||||
//System.out.println(myAlleles.toString());
|
|
||||||
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
|
|
||||||
if ( !genotype.hasLikelihoods() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
final Genotype g = GLs.get(genotype.getSampleName());
|
|
||||||
final double[] likelihoods = genotype.getLikelihoods().getAsVector();
|
|
||||||
|
|
||||||
if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
|
|
||||||
continue; // regular likelihoods
|
|
||||||
|
|
||||||
final double qual = Genotype.NO_LOG10_PERROR;
|
|
||||||
calls.replace(new Genotype(g.getSampleName(), NO_CALL_ALLELES, qual, null, g.getAttributes(), false));
|
|
||||||
}
|
|
||||||
|
|
||||||
return calls;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
|
||||||
int j = logYMatrix.length - 1;
|
|
||||||
System.out.printf("-----------------------------------%n");
|
|
||||||
for (int k=0; k <= numChr; k++) {
|
|
||||||
double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
|
||||||
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
@ -79,17 +80,15 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||||
* @param contexts stratified alignment contexts
|
* @param contexts stratified alignment contexts
|
||||||
* @param contextType stratified context type
|
* @param contextType stratified context type
|
||||||
* @param priors priors to use for GLs
|
* @param priors priors to use for GLs
|
||||||
* @param GLs hash of sample->GL to fill in
|
|
||||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||||
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
||||||
* @return genotype likelihoods per sample for AA, AB, BB
|
* @return variant context where genotypes are no-called but with GLs
|
||||||
*/
|
*/
|
||||||
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
|
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref,
|
ReferenceContext ref,
|
||||||
Map<String, AlignmentContext> contexts,
|
Map<String, AlignmentContext> contexts,
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
GenotypePriors priors,
|
||||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
|
||||||
Allele alternateAlleleToUse,
|
Allele alternateAlleleToUse,
|
||||||
boolean useBAQedPileup);
|
boolean useBAQedPileup);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
// get deletion length
|
// get deletion length
|
||||||
int dLen = Integer.valueOf(bestAltAllele.substring(1));
|
int dLen = Integer.valueOf(bestAltAllele.substring(1));
|
||||||
// get ref bases of accurate deletion
|
// get ref bases of accurate deletion
|
||||||
int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart());
|
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
|
||||||
|
|
||||||
//System.out.println(new String(ref.getBases()));
|
//System.out.println(new String(ref.getBases()));
|
||||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
||||||
|
|
@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
|
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
|
||||||
|
|
||||||
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref,
|
ReferenceContext ref,
|
||||||
Map<String, AlignmentContext> contexts,
|
Map<String, AlignmentContext> contexts,
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
GenotypePriors priors,
|
||||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
|
||||||
Allele alternateAlleleToUse,
|
Allele alternateAlleleToUse,
|
||||||
boolean useBAQedPileup) {
|
boolean useBAQedPileup) {
|
||||||
|
|
||||||
if ( tracker == null )
|
if ( tracker == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
||||||
GenomeLoc loc = ref.getLocus();
|
GenomeLoc loc = ref.getLocus();
|
||||||
Allele refAllele, altAllele;
|
Allele refAllele, altAllele;
|
||||||
VariantContext vc = null;
|
VariantContext vc = null;
|
||||||
|
|
@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||||
ref, hsize, numPrefBases);
|
ref, hsize, numPrefBases);
|
||||||
|
|
||||||
|
// start making the VariantContext
|
||||||
|
final int endLoc = calculateEndPos(alleleList, refAllele, loc);
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
|
||||||
|
|
||||||
|
// create the genotypes; no-call everyone for now
|
||||||
|
GenotypesContext genotypes = GenotypesContext.create();
|
||||||
|
final List<Allele> noCall = new ArrayList<Allele>();
|
||||||
|
noCall.add(Allele.NO_CALL);
|
||||||
|
|
||||||
// For each sample, get genotype likelihoods based on pileup
|
// For each sample, get genotype likelihoods based on pileup
|
||||||
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
||||||
// initialize the GenotypeLikelihoods
|
|
||||||
GLs.clear();
|
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||||
|
|
@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
if (pileup != null ) {
|
if (pileup != null ) {
|
||||||
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||||
|
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
|
||||||
|
|
||||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
alleleList,
|
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||||
genotypeLikelihoods,
|
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||||
getFilteredDepth(pileup)));
|
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
|
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
|
||||||
|
|
@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return refAllele;
|
return builder.genotypes(genotypes).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||||
|
// for indels, stop location is one more than ref allele length
|
||||||
|
boolean hasNullAltAllele = false;
|
||||||
|
for ( Allele a : alleles ) {
|
||||||
|
if ( a.isNull() ) {
|
||||||
|
hasNullAltAllele = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int endLoc = loc.getStart() + refAllele.length();
|
||||||
|
if( !hasNullAltAllele )
|
||||||
|
endLoc--;
|
||||||
|
|
||||||
|
return endLoc;
|
||||||
|
}
|
||||||
|
|
||||||
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
|
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
|
||||||
return indelLikelihoodMap.get();
|
return indelLikelihoodMap.get();
|
||||||
|
|
|
||||||
|
|
@ -1,52 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: delangel
|
|
||||||
* Date: 6/1/11
|
|
||||||
* Time: 10:38 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class MultiallelicGenotypeLikelihoods {
|
|
||||||
private String sample;
|
|
||||||
private double[] GLs;
|
|
||||||
private List<Allele> alleleList;
|
|
||||||
private int depth;
|
|
||||||
|
|
||||||
public MultiallelicGenotypeLikelihoods(String sample,
|
|
||||||
List<Allele> A,
|
|
||||||
double[] log10Likelihoods, int depth) {
|
|
||||||
/* Check for consistency between likelihood vector and number of alleles */
|
|
||||||
int numAlleles = A.size();
|
|
||||||
if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
|
|
||||||
throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
|
|
||||||
|
|
||||||
this.sample = sample;
|
|
||||||
this.alleleList = A;
|
|
||||||
this.GLs = log10Likelihoods;
|
|
||||||
this.depth = depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSample() {
|
|
||||||
return sample;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[] getLikelihoods() {
|
|
||||||
return GLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Allele> getAlleles() {
|
|
||||||
return alleleList;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getDepth() {
|
|
||||||
return depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||||
|
|
||||||
// the alternate allele with the largest sum of quality scores
|
private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50;
|
||||||
protected Byte bestAlternateAllele = null;
|
|
||||||
|
private boolean ALLOW_MULTIPLE_ALLELES;
|
||||||
|
|
||||||
private final boolean useAlleleFromVCF;
|
private final boolean useAlleleFromVCF;
|
||||||
|
|
||||||
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||||
super(UAC, logger);
|
super(UAC, logger);
|
||||||
|
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
|
||||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref,
|
ReferenceContext ref,
|
||||||
Map<String, AlignmentContext> contexts,
|
Map<String, AlignmentContext> contexts,
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
GenotypePriors priors,
|
||||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
|
||||||
Allele alternateAlleleToUse,
|
Allele alternateAlleleToUse,
|
||||||
boolean useBAQedPileup) {
|
boolean useBAQedPileup) {
|
||||||
|
|
||||||
if ( !(priors instanceof DiploidSNPGenotypePriors) )
|
if ( !(priors instanceof DiploidSNPGenotypePriors) )
|
||||||
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
|
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
|
||||||
|
|
||||||
byte refBase = ref.getBase();
|
final boolean[] basesToUse = new boolean[4];
|
||||||
Allele refAllele = Allele.create(refBase, true);
|
final byte refBase = ref.getBase();
|
||||||
|
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||||
|
|
||||||
// find the alternate allele with the largest sum of quality scores
|
// start making the VariantContext
|
||||||
|
final GenomeLoc loc = ref.getLocus();
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
alleles.add(Allele.create(refBase, true));
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
|
||||||
|
|
||||||
|
// find the alternate allele(s) that we should be using
|
||||||
if ( alternateAlleleToUse != null ) {
|
if ( alternateAlleleToUse != null ) {
|
||||||
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
|
basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
|
||||||
} else if ( useAlleleFromVCF ) {
|
} else if ( useAlleleFromVCF ) {
|
||||||
VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
|
final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
|
||||||
|
|
||||||
// ignore places where we don't have a variant
|
// ignore places where we don't have a SNP
|
||||||
if ( vc == null )
|
if ( vc == null || !vc.isSNP() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
if ( !vc.isBiallelic() ) {
|
for ( Allele allele : vc.getAlternateAlleles() )
|
||||||
// for multi-allelic sites go back to the reads and find the most likely alternate allele
|
basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
|
||||||
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
|
|
||||||
} else {
|
} else {
|
||||||
bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
|
|
||||||
}
|
|
||||||
|
|
||||||
// if there are no non-ref bases...
|
determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);
|
||||||
if ( bestAlternateAllele == null ) {
|
|
||||||
|
// how many alternate alleles are we using?
|
||||||
|
int alleleCounter = countSetBits(basesToUse);
|
||||||
|
|
||||||
|
// if there are no non-ref alleles...
|
||||||
|
if ( alleleCounter == 0 ) {
|
||||||
// if we only want variants, then we don't need to calculate genotype likelihoods
|
// if we only want variants, then we don't need to calculate genotype likelihoods
|
||||||
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
||||||
return refAllele;
|
return builder.make();
|
||||||
|
|
||||||
// otherwise, choose any alternate allele (it doesn't really matter)
|
// otherwise, choose any alternate allele (it doesn't really matter)
|
||||||
bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C');
|
basesToUse[indexOfRefBase == 0 ? 1 : 0] = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Allele altAllele = Allele.create(bestAlternateAllele, false);
|
// create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
|
||||||
|
final int numAltAlleles = countSetBits(basesToUse);
|
||||||
|
final int[] alleleOrdering = new int[numAltAlleles + 1];
|
||||||
|
alleleOrdering[0] = indexOfRefBase;
|
||||||
|
int alleleOrderingIndex = 1;
|
||||||
|
int numLikelihoods = 1;
|
||||||
|
for ( int i = 0; i < 4; i++ ) {
|
||||||
|
if ( i != indexOfRefBase && basesToUse[i] ) {
|
||||||
|
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
|
||||||
|
alleleOrdering[alleleOrderingIndex++] = i;
|
||||||
|
numLikelihoods += alleleOrderingIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
builder.alleles(alleles);
|
||||||
|
|
||||||
|
// create the genotypes; no-call everyone for now
|
||||||
|
GenotypesContext genotypes = GenotypesContext.create();
|
||||||
|
final List<Allele> noCall = new ArrayList<Allele>();
|
||||||
|
noCall.add(Allele.NO_CALL);
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||||
if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); }
|
if ( useBAQedPileup )
|
||||||
|
pileup = createBAQedPileup( pileup );
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
||||||
int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
||||||
if ( nGoodBases == 0 )
|
if ( nGoodBases == 0 )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
double[] likelihoods = GL.getLikelihoods();
|
final double[] allLikelihoods = GL.getLikelihoods();
|
||||||
|
final double[] myLikelihoods = new double[numLikelihoods];
|
||||||
|
|
||||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase);
|
int myLikelihoodsIndex = 0;
|
||||||
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele);
|
for ( int i = 0; i <= numAltAlleles; i++ ) {
|
||||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele);
|
for ( int j = i; j <= numAltAlleles; j++ ) {
|
||||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
|
||||||
aList.add(refAllele);
|
}
|
||||||
aList.add(altAllele);
|
}
|
||||||
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
|
|
||||||
|
|
||||||
// normalize in log space so that max element is zero.
|
// normalize in log space so that max element is zero.
|
||||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
||||||
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
|
|
||||||
|
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
|
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||||
|
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||||
|
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
return refAllele;
|
return builder.genotypes(genotypes).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void initializeBestAlternateAllele(byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
|
private int countSetBits(boolean[] array) {
|
||||||
|
int counter = 0;
|
||||||
|
for ( int i = 0; i < array.length; i++ ) {
|
||||||
|
if ( array[i] )
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
return counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
// fills in the allelesToUse array
|
||||||
|
protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
|
||||||
int[] qualCounts = new int[4];
|
int[] qualCounts = new int[4];
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||||
|
|
@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
|
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
|
||||||
for ( PileupElement p : pileup ) {
|
for ( PileupElement p : pileup ) {
|
||||||
// ignore deletions
|
// ignore deletions
|
||||||
if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE ))
|
if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
|
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
|
||||||
|
|
@ -149,18 +189,32 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set the non-ref base with maximum quality score sum
|
if ( ALLOW_MULTIPLE_ALLELES ) {
|
||||||
|
for ( byte altAllele : BaseUtils.BASES ) {
|
||||||
|
if ( altAllele == ref )
|
||||||
|
continue;
|
||||||
|
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||||
|
if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) {
|
||||||
|
allelesToUse[index] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// set the non-ref base which has the maximum quality score sum
|
||||||
int maxCount = 0;
|
int maxCount = 0;
|
||||||
bestAlternateAllele = null;
|
int indexOfMax = 0;
|
||||||
for ( byte altAllele : BaseUtils.BASES ) {
|
for ( byte altAllele : BaseUtils.BASES ) {
|
||||||
if ( altAllele == ref )
|
if ( altAllele == ref )
|
||||||
continue;
|
continue;
|
||||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||||
if ( qualCounts[index] > maxCount ) {
|
if ( qualCounts[index] > maxCount ) {
|
||||||
maxCount = qualCounts[index];
|
maxCount = qualCounts[index];
|
||||||
bestAlternateAllele = altAllele;
|
indexOfMax = index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( maxCount > 0 )
|
||||||
|
allelesToUse[indexOfMax] = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
|
public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,18 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
|
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
|
||||||
public boolean IGNORE_SNP_ALLELES = false;
|
public boolean IGNORE_SNP_ALLELES = false;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow multiple alleles in discovery", required = false)
|
||||||
|
public boolean MULTI_ALLELIC = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
||||||
|
* then this site will be skipped and a warning printed. Note that genotyping sites with many alternate alleles is both CPU and memory intensive.
|
||||||
|
*/
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||||
|
public int MAX_ALTERNATE_ALLELES = 5;
|
||||||
|
|
||||||
|
|
||||||
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
|
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
|
||||||
public UnifiedArgumentCollection clone() {
|
public UnifiedArgumentCollection clone() {
|
||||||
|
|
@ -176,10 +188,12 @@ public class UnifiedArgumentCollection {
|
||||||
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
||||||
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
||||||
uac.alleles = alleles;
|
uac.alleles = alleles;
|
||||||
|
uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES;
|
||||||
|
|
||||||
// todo- arguments to remove
|
// todo- arguments to remove
|
||||||
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
||||||
uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
|
uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
|
||||||
|
uac.MULTI_ALLELIC = MULTI_ALLELIC;
|
||||||
return uac;
|
return uac;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,9 @@ public class UnifiedGenotyperEngine {
|
||||||
EMIT_ALL_SITES
|
EMIT_ALL_SITES
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||||
|
protected static final double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||||
|
|
||||||
// the unified argument collection
|
// the unified argument collection
|
||||||
private final UnifiedArgumentCollection UAC;
|
private final UnifiedArgumentCollection UAC;
|
||||||
public UnifiedArgumentCollection getUAC() { return UAC; }
|
public UnifiedArgumentCollection getUAC() { return UAC; }
|
||||||
|
|
@ -73,11 +76,12 @@ public class UnifiedGenotyperEngine {
|
||||||
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
|
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
|
||||||
|
|
||||||
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
|
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
|
||||||
private final double[] log10AlleleFrequencyPriorsSNPs;
|
private final double[][] log10AlleleFrequencyPriorsSNPs;
|
||||||
private final double[] log10AlleleFrequencyPriorsIndels;
|
private final double[][] log10AlleleFrequencyPriorsIndels;
|
||||||
|
|
||||||
// the allele frequency likelihoods (allocated once as an optimization)
|
// the allele frequency likelihoods (allocated once as an optimization)
|
||||||
private ThreadLocal<double[]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[]>();
|
private ThreadLocal<double[][]> log10AlleleFrequencyLikelihoods = new ThreadLocal<double[][]>();
|
||||||
|
private ThreadLocal<double[][]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[][]>();
|
||||||
|
|
||||||
// the priors object
|
// the priors object
|
||||||
private final GenotypePriors genotypePriorsSNPs;
|
private final GenotypePriors genotypePriorsSNPs;
|
||||||
|
|
@ -96,6 +100,7 @@ public class UnifiedGenotyperEngine {
|
||||||
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
||||||
private static final Set<String> filter = new HashSet<String>(1);
|
private static final Set<String> filter = new HashSet<String>(1);
|
||||||
|
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
private final boolean BAQEnabledOnCMDLine;
|
private final boolean BAQEnabledOnCMDLine;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -113,6 +118,7 @@ public class UnifiedGenotyperEngine {
|
||||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
|
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
|
||||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
|
||||||
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
||||||
|
genomeLocParser = toolkit.getGenomeLocParser();
|
||||||
this.samples = new TreeSet<String>(samples);
|
this.samples = new TreeSet<String>(samples);
|
||||||
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
|
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
|
||||||
this.UAC = UAC.clone();
|
this.UAC = UAC.clone();
|
||||||
|
|
@ -122,10 +128,10 @@ public class UnifiedGenotyperEngine {
|
||||||
this.annotationEngine = engine;
|
this.annotationEngine = engine;
|
||||||
|
|
||||||
N = 2 * this.samples.size();
|
N = 2 * this.samples.size();
|
||||||
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
|
||||||
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
|
||||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, GenotypeLikelihoodsCalculationModel.Model.SNP);
|
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
|
||||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
|
||||||
genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||||
genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||||
|
|
||||||
|
|
@ -152,7 +158,6 @@ public class UnifiedGenotyperEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
||||||
|
|
||||||
if ( vc == null )
|
if ( vc == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -214,14 +219,7 @@ public class UnifiedGenotyperEngine {
|
||||||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>();
|
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
||||||
|
|
||||||
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
|
||||||
|
|
||||||
if ( refAllele != null )
|
|
||||||
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
|
|
||||||
else
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||||
|
|
@ -256,136 +254,170 @@ public class UnifiedGenotyperEngine {
|
||||||
return new VariantCallContext(vc, false);
|
return new VariantCallContext(vc, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
|
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||||
// no-call everyone for now
|
return calculateGenotypes(null, null, null, null, vc, model);
|
||||||
List<Allele> noCall = new ArrayList<Allele>();
|
|
||||||
noCall.add(Allele.NO_CALL);
|
|
||||||
|
|
||||||
Set<Allele> alleles = new LinkedHashSet<Allele>();
|
|
||||||
alleles.add(refAllele);
|
|
||||||
boolean addedAltAlleles = false;
|
|
||||||
|
|
||||||
GenotypesContext genotypes = GenotypesContext.create();
|
|
||||||
for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
|
|
||||||
if ( !addedAltAlleles ) {
|
|
||||||
addedAltAlleles = true;
|
|
||||||
// ordering important to maintain consistency
|
|
||||||
for (Allele a: GL.getAlleles()) {
|
|
||||||
alleles.add(a);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||||
//GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
|
||||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods());
|
|
||||||
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
|
|
||||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
|
||||||
|
|
||||||
genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||||
}
|
|
||||||
|
|
||||||
GenomeLoc loc = refContext.getLocus();
|
|
||||||
int endLoc = calculateEndPos(alleles, refAllele, loc);
|
|
||||||
|
|
||||||
return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make();
|
|
||||||
}
|
|
||||||
|
|
||||||
// private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine
|
|
||||||
private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
|
||||||
|
|
||||||
// initialize the data for this thread if that hasn't been done yet
|
// initialize the data for this thread if that hasn't been done yet
|
||||||
if ( afcm.get() == null ) {
|
if ( afcm.get() == null ) {
|
||||||
log10AlleleFrequencyPosteriors.set(new double[N+1]);
|
log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
|
||||||
|
log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
|
||||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// don't try to genotype too many alternate alleles
|
||||||
|
if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) {
|
||||||
|
logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
// estimate our confidence in a reference call and return
|
// estimate our confidence in a reference call and return
|
||||||
if ( vc.getNSamples() == 0 )
|
if ( vc.getNSamples() == 0 ) {
|
||||||
|
if ( limitedContext )
|
||||||
|
return null;
|
||||||
return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ?
|
return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ?
|
||||||
estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) :
|
estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) :
|
||||||
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
|
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
|
||||||
|
}
|
||||||
|
|
||||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||||
|
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||||
|
|
||||||
// find the most likely frequency
|
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
boolean bestGuessIsRef = true;
|
||||||
|
|
||||||
|
// which alternate allele has the highest MLE AC?
|
||||||
|
int indexOfHighestAlt = -1;
|
||||||
|
int alleleCountOfHighestAlt = -1;
|
||||||
|
|
||||||
|
// determine which alternate alleles have AF>0
|
||||||
|
boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
|
||||||
|
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||||
|
int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]);
|
||||||
|
|
||||||
|
// if the most likely AC is not 0, then this is a good alternate allele to use
|
||||||
|
if ( indexOfBestAC != 0 ) {
|
||||||
|
altAllelesToUse[i] = true;
|
||||||
|
bestGuessIsRef = false;
|
||||||
|
}
|
||||||
|
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
|
||||||
|
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
|
altAllelesToUse[i] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// keep track of the "best" alternate allele to use
|
||||||
|
if ( indexOfBestAC > alleleCountOfHighestAlt) {
|
||||||
|
alleleCountOfHighestAlt = indexOfBestAC;
|
||||||
|
indexOfHighestAlt = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// calculate p(f>0)
|
// calculate p(f>0)
|
||||||
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
|
// TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs
|
||||||
|
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]);
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
for (int i = 1; i <= N; i++)
|
for (int i = 1; i <= N; i++)
|
||||||
sum += normalizedPosteriors[i];
|
sum += normalizedPosteriors[i];
|
||||||
double PofF = Math.min(sum, 1.0); // deal with precision errors
|
double PofF = Math.min(sum, 1.0); // deal with precision errors
|
||||||
|
|
||||||
double phredScaledConfidence;
|
double phredScaledConfidence;
|
||||||
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
||||||
if ( Double.isInfinite(phredScaledConfidence) )
|
if ( Double.isInfinite(phredScaledConfidence) )
|
||||||
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
|
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0];
|
||||||
} else {
|
} else {
|
||||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||||
sum = 0.0;
|
sum = 0.0;
|
||||||
for (int i = 1; i <= N; i++) {
|
for (int i = 1; i <= N; i++) {
|
||||||
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||||
break;
|
break;
|
||||||
sum += log10AlleleFrequencyPosteriors.get()[i];
|
sum += log10AlleleFrequencyPosteriors.get()[0][i];
|
||||||
}
|
}
|
||||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
||||||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
|
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) {
|
||||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
// technically, at this point our confidence in a reference call isn't accurately estimated
|
||||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
// because it didn't take into account samples with no data, so let's get a better estimate
|
||||||
return estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
|
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// strip out any alleles that aren't going to be used in the VariantContext
|
||||||
|
List<Allele> myAlleles;
|
||||||
|
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
||||||
|
myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
|
||||||
|
myAlleles.add(vc.getReference());
|
||||||
|
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||||
|
if ( altAllelesToUse[i] )
|
||||||
|
myAlleles.add(vc.getAlternateAllele(i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// use all of the alleles if we are given them by the user
|
||||||
|
myAlleles = vc.getAlleles();
|
||||||
|
}
|
||||||
|
|
||||||
|
// start constructing the resulting VC
|
||||||
|
GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
|
||||||
|
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
|
||||||
|
builder.log10PError(phredScaledConfidence/-10.0);
|
||||||
|
if ( ! passesCallThreshold(phredScaledConfidence) )
|
||||||
|
builder.filters(filter);
|
||||||
|
if ( !limitedContext )
|
||||||
|
builder.referenceBaseForIndel(refContext.getBase());
|
||||||
|
|
||||||
// create the genotypes
|
// create the genotypes
|
||||||
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
|
||||||
|
|
||||||
// print out stats if we have a writer
|
// print out stats if we have a writer
|
||||||
if ( verboseWriter != null )
|
if ( verboseWriter != null && !limitedContext )
|
||||||
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
|
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
|
||||||
|
|
||||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
|
|
||||||
// if the site was downsampled, record that fact
|
// if the site was downsampled, record that fact
|
||||||
if ( rawContext.hasPileupBeenDownsampled() )
|
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
|
||||||
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
|
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
|
||||||
|
|
||||||
|
if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
|
||||||
if ( UAC.COMPUTE_SLOD && bestAFguess != 0 ) {
|
|
||||||
//final boolean DEBUG_SLOD = false;
|
//final boolean DEBUG_SLOD = false;
|
||||||
|
|
||||||
// the overall lod
|
// the overall lod
|
||||||
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
||||||
|
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||||
|
|
||||||
// the forward lod
|
// the forward lod
|
||||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
||||||
|
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
|
||||||
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||||
|
|
||||||
// the reverse lod
|
// the reverse lod
|
||||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
||||||
|
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
|
||||||
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||||
|
|
||||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||||
|
|
@ -401,26 +433,12 @@ public class UnifiedGenotyperEngine {
|
||||||
attributes.put("SB", strandScore);
|
attributes.put("SB", strandScore);
|
||||||
}
|
}
|
||||||
|
|
||||||
GenomeLoc loc = refContext.getLocus();
|
// finish constructing the resulting VC
|
||||||
|
|
||||||
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
|
|
||||||
|
|
||||||
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
|
|
||||||
// strip out the alternate allele if it's a ref call
|
|
||||||
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
|
||||||
myAlleles = new HashSet<Allele>(1);
|
|
||||||
myAlleles.add(vc.getReference());
|
|
||||||
}
|
|
||||||
|
|
||||||
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles);
|
|
||||||
builder.genotypes(genotypes);
|
builder.genotypes(genotypes);
|
||||||
builder.log10PError(phredScaledConfidence/-10.0);
|
|
||||||
if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter);
|
|
||||||
builder.attributes(attributes);
|
builder.attributes(attributes);
|
||||||
builder.referenceBaseForIndel(refContext.getBase());
|
|
||||||
VariantContext vcCall = builder.make();
|
VariantContext vcCall = builder.make();
|
||||||
|
|
||||||
if ( annotationEngine != null ) {
|
if ( annotationEngine != null && !limitedContext ) {
|
||||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||||
ReadBackedPileup pileup = null;
|
ReadBackedPileup pileup = null;
|
||||||
if (rawContext.hasExtendedEventPileup())
|
if (rawContext.hasExtendedEventPileup())
|
||||||
|
|
@ -435,119 +453,6 @@ public class UnifiedGenotyperEngine {
|
||||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||||
}
|
}
|
||||||
|
|
||||||
// A barebones entry point to the exact model when there is no tracker or stratified contexts available -- only GLs
|
|
||||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
|
||||||
|
|
||||||
// initialize the data for this thread if that hasn't been done yet
|
|
||||||
if ( afcm.get() == null ) {
|
|
||||||
log10AlleleFrequencyPosteriors.set(new double[N+1]);
|
|
||||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
|
||||||
}
|
|
||||||
|
|
||||||
// estimate our confidence in a reference call and return
|
|
||||||
if ( vc.getNSamples() == 0 )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
|
||||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
|
||||||
|
|
||||||
// find the most likely frequency
|
|
||||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
|
||||||
|
|
||||||
// calculate p(f>0)
|
|
||||||
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
|
|
||||||
double sum = 0.0;
|
|
||||||
for (int i = 1; i <= N; i++)
|
|
||||||
sum += normalizedPosteriors[i];
|
|
||||||
double PofF = Math.min(sum, 1.0); // deal with precision errors
|
|
||||||
|
|
||||||
double phredScaledConfidence;
|
|
||||||
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
|
||||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
|
||||||
if ( Double.isInfinite(phredScaledConfidence) )
|
|
||||||
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
|
|
||||||
} else {
|
|
||||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
|
||||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
|
||||||
sum = 0.0;
|
|
||||||
for (int i = 1; i <= N; i++) {
|
|
||||||
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
|
||||||
break;
|
|
||||||
sum += log10AlleleFrequencyPosteriors.get()[i];
|
|
||||||
}
|
|
||||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
|
||||||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
|
|
||||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
|
||||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// create the genotypes
|
|
||||||
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
|
||||||
|
|
||||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
|
||||||
|
|
||||||
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
|
|
||||||
|
|
||||||
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
|
|
||||||
// strip out the alternate allele if it's a ref call
|
|
||||||
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
|
||||||
myAlleles = new HashSet<Allele>(1);
|
|
||||||
myAlleles.add(vc.getReference());
|
|
||||||
}
|
|
||||||
|
|
||||||
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles);
|
|
||||||
builder.genotypes(genotypes);
|
|
||||||
builder.log10PError(phredScaledConfidence/-10.0);
|
|
||||||
if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter);
|
|
||||||
builder.attributes(attributes);
|
|
||||||
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
return new VariantCallContext(builder.make(), confidentlyCalled(phredScaledConfidence, PofF));
|
|
||||||
}
|
|
||||||
|
|
||||||
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
|
||||||
// TODO - temp fix until we can deal with extended events properly
|
|
||||||
// for indels, stop location is one more than ref allele length
|
|
||||||
boolean isSNP = true, hasNullAltAllele = false;
|
|
||||||
for (Allele a : alleles){
|
|
||||||
if (a.length() != 1) {
|
|
||||||
isSNP = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (Allele a : alleles){
|
|
||||||
if (a.isNull()) {
|
|
||||||
hasNullAltAllele = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
|
|
||||||
// standard insertion: ref allele length = 0, endLos = startLoc
|
|
||||||
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
|
|
||||||
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
|
|
||||||
//
|
|
||||||
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
|
|
||||||
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
|
|
||||||
|
|
||||||
|
|
||||||
int endLoc = loc.getStart();
|
|
||||||
if ( !isSNP) {
|
|
||||||
endLoc += refAllele.length();
|
|
||||||
if(!hasNullAltAllele)
|
|
||||||
endLoc--;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return endLoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||||
|
|
||||||
Map<String, AlignmentContext> stratifiedContexts = null;
|
Map<String, AlignmentContext> stratifiedContexts = null;
|
||||||
|
|
@ -604,9 +509,12 @@ public class UnifiedGenotyperEngine {
|
||||||
return stratifiedContexts;
|
return stratifiedContexts;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static void clearAFarray(double[] AFs) {
|
protected static void clearAFarray(double[][] AFs) {
|
||||||
for ( int i = 0; i < AFs.length; i++ )
|
for ( int i = 0; i < AFs.length; i++ ) {
|
||||||
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
for ( int j = 0; j < AFs[i].length; j++ ) {
|
||||||
|
AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static double[] binomialProbabilityDepthCache = new double[10000];
|
private final static double[] binomialProbabilityDepthCache = new double[10000];
|
||||||
|
|
@ -676,7 +584,7 @@ public class UnifiedGenotyperEngine {
|
||||||
AFline.append(i + "/" + N + "\t");
|
AFline.append(i + "/" + N + "\t");
|
||||||
AFline.append(String.format("%.2f\t", ((float)i)/N));
|
AFline.append(String.format("%.2f\t", ((float)i)/N));
|
||||||
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
|
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
|
||||||
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
|
if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
|
||||||
AFline.append("0.00000000\t");
|
AFline.append("0.00000000\t");
|
||||||
else
|
else
|
||||||
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i]));
|
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i]));
|
||||||
|
|
@ -689,8 +597,8 @@ public class UnifiedGenotyperEngine {
|
||||||
verboseWriter.println();
|
verboseWriter.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean passesEmitThreshold(double conf, int bestAFguess) {
|
protected boolean passesEmitThreshold(double conf, boolean bestGuessIsRef) {
|
||||||
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean passesCallThreshold(double conf) {
|
protected boolean passesCallThreshold(double conf) {
|
||||||
|
|
@ -744,27 +652,25 @@ public class UnifiedGenotyperEngine {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) {
|
protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) {
|
||||||
// calculate the allele frequency priors for 1-N
|
|
||||||
|
// the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i
|
||||||
|
for (int alleles = 1; alleles <= priors.length; alleles++) {
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
double heterozygosity;
|
|
||||||
|
|
||||||
if (model == GenotypeLikelihoodsCalculationModel.Model.INDEL)
|
|
||||||
heterozygosity = UAC.INDEL_HETEROZYGOSITY;
|
|
||||||
else
|
|
||||||
heterozygosity = UAC.heterozygosity;
|
|
||||||
|
|
||||||
|
// for each i
|
||||||
for (int i = 1; i <= N; i++) {
|
for (int i = 1; i <= N; i++) {
|
||||||
double value = heterozygosity / (double)i;
|
double value = Math.pow(theta, alleles) / (double)i;
|
||||||
priors[i] = Math.log10(value);
|
priors[alleles-1][i] = Math.log10(value);
|
||||||
sum += value;
|
sum += value;
|
||||||
}
|
}
|
||||||
|
|
||||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||||
priors[0] = Math.log10(1.0 - sum);
|
priors[alleles-1][0] = Math.log10(1.0 - sum);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||||
switch( model ) {
|
switch( model ) {
|
||||||
case SNP:
|
case SNP:
|
||||||
return log10AlleleFrequencyPriorsSNPs;
|
return log10AlleleFrequencyPriorsSNPs;
|
||||||
|
|
@ -837,4 +743,88 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
return vc;
|
return vc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param vc variant context with genotype likelihoods
|
||||||
|
* @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use
|
||||||
|
*
|
||||||
|
* @return genotypes
|
||||||
|
*/
|
||||||
|
public GenotypesContext assignGenotypes(VariantContext vc,
|
||||||
|
boolean[] allelesToUse) {
|
||||||
|
|
||||||
|
final GenotypesContext GLs = vc.getGenotypes();
|
||||||
|
|
||||||
|
final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
|
||||||
|
|
||||||
|
final GenotypesContext calls = GenotypesContext.create();
|
||||||
|
|
||||||
|
for ( int k = GLs.size() - 1; k >= 0; k-- ) {
|
||||||
|
final String sample = sampleIndices.get(k);
|
||||||
|
final Genotype g = GLs.get(sample);
|
||||||
|
if ( !g.hasLikelihoods() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final double[] likelihoods = g.getLikelihoods().getAsVector();
|
||||||
|
|
||||||
|
// if there is no mass on the likelihoods, then just no-call the sample
|
||||||
|
if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||||
|
calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
|
||||||
|
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
|
||||||
|
|
||||||
|
final int numAltAlleles = allelesToUse.length;
|
||||||
|
|
||||||
|
// start with the assumption that the ideal genotype is homozygous reference
|
||||||
|
Allele maxAllele1 = vc.getReference(), maxAllele2 = vc.getReference();
|
||||||
|
double maxLikelihoodSeen = likelihoods[0];
|
||||||
|
int indexOfMax = 0;
|
||||||
|
|
||||||
|
// keep track of some state
|
||||||
|
Allele firstAllele = vc.getReference();
|
||||||
|
int subtractor = numAltAlleles + 1;
|
||||||
|
int subtractionsMade = 0;
|
||||||
|
|
||||||
|
for ( int i = 1, PLindex = 1; i < likelihoods.length; i++, PLindex++ ) {
|
||||||
|
if ( PLindex == subtractor ) {
|
||||||
|
firstAllele = vc.getAlternateAllele(subtractionsMade);
|
||||||
|
PLindex -= subtractor;
|
||||||
|
subtractor--;
|
||||||
|
subtractionsMade++;
|
||||||
|
|
||||||
|
// we can skip this allele if it's not usable
|
||||||
|
if ( !allelesToUse[subtractionsMade-1] ) {
|
||||||
|
i += subtractor - 1;
|
||||||
|
PLindex += subtractor - 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we don't care about the entry if we've already seen better
|
||||||
|
if ( likelihoods[i] <= maxLikelihoodSeen )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// if it's usable then update the alleles
|
||||||
|
int alleleIndex = subtractionsMade + PLindex - 1;
|
||||||
|
if ( allelesToUse[alleleIndex] ) {
|
||||||
|
maxAllele1 = firstAllele;
|
||||||
|
maxAllele2 = vc.getAlternateAllele(alleleIndex);
|
||||||
|
maxLikelihoodSeen = likelihoods[i];
|
||||||
|
indexOfMax = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||||
|
myAlleles.add(maxAllele1);
|
||||||
|
myAlleles.add(maxAllele2);
|
||||||
|
|
||||||
|
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(indexOfMax, likelihoods);
|
||||||
|
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
||||||
|
}
|
||||||
|
|
||||||
|
return calls;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -164,13 +164,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
|
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
|
||||||
protected double MIN_PHASE_QUALITY = 10.0;
|
protected double MIN_PHASE_QUALITY = 10.0;
|
||||||
|
|
||||||
/**
|
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50.", required=false)
|
||||||
* This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined.
|
|
||||||
*/
|
|
||||||
@Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false)
|
|
||||||
protected String FAMILY_STRUCTURE;
|
|
||||||
|
|
||||||
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
|
||||||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||||
|
|
||||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||||
|
|
@ -561,8 +555,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
|
|
||||||
public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; }
|
public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; }
|
||||||
|
|
||||||
public String getFamilyStructure() { return FAMILY_STRUCTURE; }
|
|
||||||
|
|
||||||
public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; }
|
public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; }
|
||||||
|
|
||||||
public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; }
|
public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; }
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -7,9 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import java.io.PrintStream;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mendelian violation detection and counting
|
* Mendelian violation detection and counting
|
||||||
|
|
@ -40,12 +44,25 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
|
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
|
||||||
public class MendelianViolationEvaluator extends VariantEvaluator {
|
public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
|
|
||||||
@DataPoint(description = "Number of mendelian variants found")
|
@DataPoint(description = "Number of variants found with at least one family having genotypes")
|
||||||
long nVariants;
|
long nVariants;
|
||||||
|
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall")
|
||||||
|
long nSkipped;
|
||||||
|
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)")
|
||||||
|
long nFamCalled;
|
||||||
|
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.")
|
||||||
|
long nVarFamCalled;
|
||||||
|
@DataPoint(description="Number of variants x families discarded as low quality")
|
||||||
|
long nLowQual;
|
||||||
|
@DataPoint(description="Number of variants x families discarded as no call")
|
||||||
|
long nNoCall;
|
||||||
|
@DataPoint(description="Number of loci with mendelian violations")
|
||||||
|
long nLociViolations;
|
||||||
@DataPoint(description = "Number of mendelian violations found")
|
@DataPoint(description = "Number of mendelian violations found")
|
||||||
long nViolations;
|
long nViolations;
|
||||||
|
|
||||||
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
|
||||||
|
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
||||||
long KidHomRef_ParentHomVar;
|
long KidHomRef_ParentHomVar;
|
||||||
@DataPoint(description = "number of child het calls where the parent was hom ref")
|
@DataPoint(description = "number of child het calls where the parent was hom ref")
|
||||||
long KidHet_ParentsHomRef;
|
long KidHet_ParentsHomRef;
|
||||||
|
|
@ -53,11 +70,65 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
long KidHet_ParentsHomVar;
|
long KidHet_ParentsHomVar;
|
||||||
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
||||||
long KidHomVar_ParentHomRef;
|
long KidHomVar_ParentHomRef;
|
||||||
|
*/
|
||||||
|
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR")
|
||||||
|
long mvRefRef_Var;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET")
|
||||||
|
long mvRefRef_Het;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR")
|
||||||
|
long mvRefHet_Var;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR")
|
||||||
|
long mvRefVar_Var;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF")
|
||||||
|
long mvRefVar_Ref;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF")
|
||||||
|
long mvVarHet_Ref;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF")
|
||||||
|
long mvVarVar_Ref;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET")
|
||||||
|
long mvVarVar_Het;
|
||||||
|
|
||||||
|
|
||||||
|
/*@DataPoint(description ="Number of inherited var alleles from het parents")
|
||||||
|
long nInheritedVar;
|
||||||
|
@DataPoint(description ="Number of inherited ref alleles from het parents")
|
||||||
|
long nInheritedRef;*/
|
||||||
|
|
||||||
|
@DataPoint(description="Number of HomRef/HomRef/HomRef trios")
|
||||||
|
long HomRefHomRef_HomRef;
|
||||||
|
@DataPoint(description="Number of Het/Het/Het trios")
|
||||||
|
long HetHet_Het;
|
||||||
|
@DataPoint(description="Number of Het/Het/HomRef trios")
|
||||||
|
long HetHet_HomRef;
|
||||||
|
@DataPoint(description="Number of Het/Het/HomVar trios")
|
||||||
|
long HetHet_HomVar;
|
||||||
|
@DataPoint(description="Number of HomVar/HomVar/HomVar trios")
|
||||||
|
long HomVarHomVar_HomVar;
|
||||||
|
@DataPoint(description="Number of HomRef/HomVar/Het trios")
|
||||||
|
long HomRefHomVAR_Het;
|
||||||
|
@DataPoint(description="Number of ref alleles inherited from het/het parents")
|
||||||
|
long HetHet_inheritedRef;
|
||||||
|
@DataPoint(description="Number of var alleles inherited from het/het parents")
|
||||||
|
long HetHet_inheritedVar;
|
||||||
|
@DataPoint(description="Number of ref alleles inherited from homRef/het parents")
|
||||||
|
long HomRefHet_inheritedRef;
|
||||||
|
@DataPoint(description="Number of var alleles inherited from homRef/het parents")
|
||||||
|
long HomRefHet_inheritedVar;
|
||||||
|
@DataPoint(description="Number of ref alleles inherited from homVar/het parents")
|
||||||
|
long HomVarHet_inheritedRef;
|
||||||
|
@DataPoint(description="Number of var alleles inherited from homVar/het parents")
|
||||||
|
long HomVarHet_inheritedVar;
|
||||||
|
|
||||||
MendelianViolation mv;
|
MendelianViolation mv;
|
||||||
|
PrintStream mvFile;
|
||||||
|
Map<String,Set<Sample>> families;
|
||||||
|
|
||||||
public void initialize(VariantEvalWalker walker) {
|
public void initialize(VariantEvalWalker walker) {
|
||||||
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
//Changed by Laurent Francioli - 2011-06-07
|
||||||
|
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||||
|
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
|
||||||
|
families = walker.getSampleDB().getFamilies();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean enabled() {
|
public boolean enabled() {
|
||||||
|
|
@ -75,110 +146,48 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
|
|
||||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
||||||
if (mv.setAlleles(vc)) {
|
|
||||||
|
if(mv.countViolations(families,vc)>0){
|
||||||
|
nLociViolations++;
|
||||||
|
nViolations += mv.getViolationsCount();
|
||||||
|
mvRefRef_Var += mv.getParentsRefRefChildVar();
|
||||||
|
mvRefRef_Het += mv.getParentsRefRefChildHet();
|
||||||
|
mvRefHet_Var += mv.getParentsRefHetChildVar();
|
||||||
|
mvRefVar_Var += mv.getParentsRefVarChildVar();
|
||||||
|
mvRefVar_Ref += mv.getParentsRefVarChildRef();
|
||||||
|
mvVarHet_Ref += mv.getParentsVarHetChildRef();
|
||||||
|
mvVarVar_Ref += mv.getParentsVarVarChildRef();
|
||||||
|
mvVarVar_Het += mv.getParentsVarVarChildHet();
|
||||||
|
|
||||||
|
}
|
||||||
|
HomRefHomRef_HomRef += mv.getRefRefRef();
|
||||||
|
HetHet_Het += mv.getHetHetHet();
|
||||||
|
HetHet_HomRef += mv.getHetHetHomRef();
|
||||||
|
HetHet_HomVar += mv.getHetHetHomVar();
|
||||||
|
HomVarHomVar_HomVar += mv.getVarVarVar();
|
||||||
|
HomRefHomVAR_Het += mv.getRefVarHet();
|
||||||
|
HetHet_inheritedRef += mv.getParentsHetHetInheritedRef();
|
||||||
|
HetHet_inheritedVar += mv.getParentsHetHetInheritedVar();
|
||||||
|
HomRefHet_inheritedRef += mv.getParentsRefHetInheritedRef();
|
||||||
|
HomRefHet_inheritedVar += mv.getParentsRefHetInheritedVar();
|
||||||
|
HomVarHet_inheritedRef += mv.getParentsVarHetInheritedRef();
|
||||||
|
HomVarHet_inheritedVar += mv.getParentsVarHetInheritedVar();
|
||||||
|
|
||||||
|
if(mv.getFamilyCalledCount()>0){
|
||||||
nVariants++;
|
nVariants++;
|
||||||
|
nFamCalled += mv.getFamilyCalledCount();
|
||||||
Genotype momG = vc.getGenotype(mv.getSampleMom());
|
nLowQual += mv.getFamilyLowQualsCount();
|
||||||
Genotype dadG = vc.getGenotype(mv.getSampleDad());
|
nNoCall += mv.getFamilyNoCallCount();
|
||||||
Genotype childG = vc.getGenotype(mv.getSampleChild());
|
nVarFamCalled += mv.getVarFamilyCalledCount();
|
||||||
|
|
||||||
if (mv.isViolation()) {
|
|
||||||
nViolations++;
|
|
||||||
|
|
||||||
String label;
|
|
||||||
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
|
|
||||||
label = "KidHomRef_ParentHomVar";
|
|
||||||
KidHomRef_ParentHomVar++;
|
|
||||||
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
|
|
||||||
label = "KidHet_ParentsHomRef";
|
|
||||||
KidHet_ParentsHomRef++;
|
|
||||||
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
|
|
||||||
label = "KidHet_ParentsHomVar";
|
|
||||||
KidHet_ParentsHomVar++;
|
|
||||||
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
|
|
||||||
label = "KidHomVar_ParentHomRef";
|
|
||||||
KidHomVar_ParentHomRef++;
|
|
||||||
} else {
|
|
||||||
throw new ReviewedStingException("BUG: unexpected child genotype class " + childG);
|
|
||||||
}
|
}
|
||||||
|
else{
|
||||||
return "MendelViolation=" + label;
|
nSkipped++;
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null; // we don't capture any intersting sites
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
return null;
|
||||||
private double getQThreshold() {
|
|
||||||
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
|
||||||
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
|
|
||||||
//return 0.0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TrioStructure trio;
|
return null; // we don't capture any interesting sites
|
||||||
double mendelianViolationQualThreshold;
|
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
|
||||||
|
|
||||||
public static class TrioStructure {
|
|
||||||
public String mom, dad, child;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static TrioStructure parseTrioDescription(String family) {
|
|
||||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
|
||||||
if (m.matches()) {
|
|
||||||
TrioStructure trio = new TrioStructure();
|
|
||||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
|
||||||
trio.mom = m.group(1);
|
|
||||||
trio.dad = m.group(2);
|
|
||||||
trio.child = m.group(3);
|
|
||||||
return trio;
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initialize(VariantEvalWalker walker) {
|
|
||||||
trio = parseTrioDescription(walker.getFamilyStructure());
|
|
||||||
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean includeGenotype(Genotype g) {
|
|
||||||
return g.getLog10PError() > getQThreshold() && g.isCalled();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
|
|
||||||
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isViolation(VariantContext vc, TrioStructure trio ) {
|
|
||||||
return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) );
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
|
|
||||||
//VariantContext momVC = vc.subContextFromGenotypes(momG);
|
|
||||||
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
|
|
||||||
int i = 0;
|
|
||||||
Genotype childG = new Genotype("kidG", childA);
|
|
||||||
for (Allele momAllele : momA) {
|
|
||||||
for (Allele dadAllele : dadA) {
|
|
||||||
if (momAllele.isCalled() && dadAllele.isCalled()) {
|
|
||||||
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
|
|
||||||
if (childG.sameGenotype(possibleChild)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
@ -41,7 +42,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -180,7 +180,7 @@ import java.util.*;
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class SelectVariants extends RodWalker<Integer, Integer> {
|
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -282,6 +282,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
|
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
|
||||||
private double fractionRandom = 0;
|
private double fractionRandom = 0;
|
||||||
|
|
||||||
|
@Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
|
||||||
|
private double fractionGenotypes = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||||
* When specified one or more times, a particular type of variant is selected.
|
* When specified one or more times, a particular type of variant is selected.
|
||||||
|
|
@ -325,7 +328,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
private boolean DISCORDANCE_ONLY = false;
|
private boolean DISCORDANCE_ONLY = false;
|
||||||
private boolean CONCORDANCE_ONLY = false;
|
private boolean CONCORDANCE_ONLY = false;
|
||||||
|
|
||||||
private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>();
|
private MendelianViolation mv;
|
||||||
|
|
||||||
|
|
||||||
/* variables used by the SELECT RANDOM modules */
|
/* variables used by the SELECT RANDOM modules */
|
||||||
|
|
@ -344,6 +347,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
private PrintStream outMVFileStream = null;
|
private PrintStream outMVFileStream = null;
|
||||||
|
|
||||||
|
//Random number generator for the genotypes to remove
|
||||||
|
private Random randomGenotypes = new Random();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
|
|
@ -380,8 +385,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
for ( String sample : samples )
|
for ( String sample : samples )
|
||||||
logger.info("Including sample '" + sample + "'");
|
logger.info("Including sample '" + sample + "'");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
||||||
if (TYPES_TO_INCLUDE.isEmpty()) {
|
if (TYPES_TO_INCLUDE.isEmpty()) {
|
||||||
|
|
||||||
|
|
@ -421,29 +424,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
|
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
|
||||||
|
|
||||||
if (MENDELIAN_VIOLATIONS) {
|
if (MENDELIAN_VIOLATIONS) {
|
||||||
if ( FAMILY_STRUCTURE_FILE != null) {
|
mv = new MendelianViolation(MENDELIAN_VIOLATION_QUAL_THRESHOLD,false,true);
|
||||||
try {
|
|
||||||
for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) {
|
|
||||||
MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
|
||||||
if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom()))
|
|
||||||
mvSet.add(mv);
|
|
||||||
}
|
|
||||||
} catch ( FileNotFoundException e ) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
|
|
||||||
}
|
|
||||||
if (outMVFile != null)
|
|
||||||
try {
|
|
||||||
outMVFileStream = new PrintStream(outMVFile);
|
|
||||||
}
|
|
||||||
catch (FileNotFoundException e) {
|
|
||||||
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
|
||||||
}
|
|
||||||
else
|
|
||||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
|
||||||
}
|
|
||||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
|
||||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
|
||||||
MENDELIAN_VIOLATIONS = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SELECT_RANDOM_NUMBER = numRandom > 0;
|
SELECT_RANDOM_NUMBER = numRandom > 0;
|
||||||
|
|
@ -479,26 +460,26 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
if (MENDELIAN_VIOLATIONS) {
|
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
|
||||||
boolean foundMV = false;
|
break;
|
||||||
for (MendelianViolation mv : mvSet) {
|
|
||||||
if (mv.isViolation(vc)) {
|
if (outMVFile != null){
|
||||||
foundMV = true;
|
for( String familyId : mv.getViolationFamilies()){
|
||||||
//System.out.println(vc.toString());
|
for(Sample sample : this.getSampleDB().getFamily(familyId)){
|
||||||
if (outMVFile != null)
|
if(sample.getParents().size() > 0){
|
||||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||||
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||||
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
||||||
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(),
|
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
|
||||||
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
|
||||||
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
|
||||||
vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() );
|
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!foundMV)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (DISCORDANCE_ONLY) {
|
if (DISCORDANCE_ONLY) {
|
||||||
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
|
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
|
||||||
if (!isDiscordant(vc, compVCs))
|
if (!isDiscordant(vc, compVCs))
|
||||||
|
|
@ -629,6 +610,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Override
|
@Override
|
||||||
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer treeReduce(Integer lhs, Integer rhs) {
|
||||||
|
return lhs + rhs;
|
||||||
|
}
|
||||||
|
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
logger.info(result + " records processed.");
|
logger.info(result + " records processed.");
|
||||||
|
|
||||||
|
|
@ -657,9 +643,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||||
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||||
|
|
||||||
|
GenotypesContext newGC = sub.getGenotypes();
|
||||||
|
|
||||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||||
builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes()));
|
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||||
|
|
||||||
|
//Remove a fraction of the genotypes if needed
|
||||||
|
if(fractionGenotypes>0){
|
||||||
|
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||||
|
for ( Genotype genotype : newGC ) {
|
||||||
|
//Set genotype to no call if it falls in the fraction.
|
||||||
|
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||||
|
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||||
|
alleles.add(Allele.create((byte)'.'));
|
||||||
|
alleles.add(Allele.create((byte)'.'));
|
||||||
|
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
genotypes.add(genotype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newGC = GenotypesContext.create(genotypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.genotypes(newGC);
|
||||||
|
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
for (String sample : sub.getSampleNames()) {
|
for (String sample : sub.getSampleNames()) {
|
||||||
|
|
|
||||||
|
|
@ -1,147 +1,394 @@
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: carneiro
|
* User: carneiro / lfran
|
||||||
* Date: 3/9/11
|
* Date: 3/9/11
|
||||||
* Time: 12:38 PM
|
* Time: 12:38 PM
|
||||||
|
*
|
||||||
|
* Class for the identification and tracking of mendelian violation. It can be used in 2 distinct ways:
|
||||||
|
* - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while
|
||||||
|
* walking over the variants
|
||||||
|
* - Or using the static methods to directly get information about mendelian violation in a family at a given locus
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
public class MendelianViolation {
|
public class MendelianViolation {
|
||||||
String sampleMom;
|
//List of families with violations
|
||||||
String sampleDad;
|
private List<String> violationFamilies;
|
||||||
String sampleChild;
|
|
||||||
|
|
||||||
List allelesMom;
|
//Call information
|
||||||
List allelesDad;
|
private int nocall = 0;
|
||||||
List allelesChild;
|
private int familyCalled = 0;
|
||||||
|
private int varFamilyCalled = 0;
|
||||||
|
private int lowQual = 0;
|
||||||
|
|
||||||
double minGenotypeQuality;
|
private boolean allCalledOnly = true;
|
||||||
|
|
||||||
|
//Stores occurrences of inheritance
|
||||||
|
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
|
||||||
|
|
||||||
|
private int violations_total=0;
|
||||||
|
|
||||||
|
private double minGenotypeQuality;
|
||||||
|
|
||||||
|
private boolean abortOnSampleNotFound;
|
||||||
|
|
||||||
|
//Number of families with genotype information for all members
|
||||||
|
public int getFamilyCalledCount(){
|
||||||
|
return familyCalled;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Number of families with genotype information for all members
|
||||||
|
public int getVarFamilyCalledCount(){
|
||||||
|
return varFamilyCalled;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Number of families missing genotypes for one or more of their members
|
||||||
|
public int getFamilyNoCallCount(){
|
||||||
|
return nocall;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Number of families with genotypes below the set quality threshold
|
||||||
|
public int getFamilyLowQualsCount(){
|
||||||
|
return lowQual;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getViolationsCount(){
|
||||||
|
return violations_total;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of alt alleles inherited from het parents (no violation)
|
||||||
|
public int getParentHetInheritedVar(){
|
||||||
|
return getParentsHetHetInheritedVar() + getParentsRefHetInheritedVar() + getParentsVarHetInheritedVar();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of ref alleles inherited from het parents (no violation)
|
||||||
|
public int getParentHetInheritedRef(){
|
||||||
|
return getParentsHetHetInheritedRef() + getParentsRefHetInheritedRef() + getParentsVarHetInheritedRef();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of HomRef/HomRef/HomRef trios
|
||||||
|
public int getRefRefRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of HomVar/HomVar/HomVar trios
|
||||||
|
public int getVarVarVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of HomRef/HomVar/Het trios
|
||||||
|
public int getRefVarHet(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
|
||||||
|
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of Het/Het/Het trios
|
||||||
|
public int getHetHetHet(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of Het/Het/HomRef trios
|
||||||
|
public int getHetHetHomRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of Het/Het/HomVar trios
|
||||||
|
public int getHetHetHomVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of ref alleles inherited from Het/Het parents (no violation)
|
||||||
|
public int getParentsHetHetInheritedRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||||
|
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||||
|
//return parentsHetHet_childRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of var alleles inherited from Het/Het parents (no violation)
|
||||||
|
public int getParentsHetHetInheritedVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||||
|
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||||
|
//return parentsHetHet_childVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of ref alleles inherited from HomRef/Het parents (no violation)
|
||||||
|
public int getParentsRefHetInheritedRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||||
|
//return parentsHomRefHet_childRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of var alleles inherited from HomRef/Het parents (no violation)
|
||||||
|
public int getParentsRefHetInheritedVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||||
|
//return parentsHomRefHet_childVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of ref alleles inherited from HomVar/Het parents (no violation)
|
||||||
|
public int getParentsVarHetInheritedRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||||
|
//return parentsHomVarHet_childRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of var alleles inherited from HomVar/Het parents (no violation)
|
||||||
|
public int getParentsVarHetInheritedVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||||
|
//return parentsHomVarHet_childVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
|
||||||
|
public int getParentsRefRefChildVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HOM_REF -> HET
|
||||||
|
public int getParentsRefRefChildHet(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HET -> HOM_VAR
|
||||||
|
public int getParentsRefHetChildVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
|
||||||
|
public int getParentsRefVarChildVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
|
||||||
|
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
|
||||||
|
public int getParentsRefVarChildRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||||
|
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_VAR/HET -> HOM_REF
|
||||||
|
public int getParentsVarHetChildRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||||
|
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
|
||||||
|
public int getParentsVarVarChildRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
|
||||||
|
public int getParentsVarVarChildHet(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_VAR/? -> HOM_REF
|
||||||
|
public int getParentVarChildRef(){
|
||||||
|
return getParentsRefVarChildRef() + getParentsVarHetChildRef() +getParentsVarVarChildRef();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/? -> HOM_VAR
|
||||||
|
public int getParentRefChildVar(){
|
||||||
|
return getParentsRefVarChildVar() + getParentsRefHetChildVar() +getParentsRefRefChildVar();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Returns a String containing all trios where a Mendelian violation was observed.
|
||||||
|
//The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..."
|
||||||
|
public String getViolationFamiliesString(){
|
||||||
|
if(violationFamilies.isEmpty())
|
||||||
|
return "";
|
||||||
|
|
||||||
|
Iterator<String> it = violationFamilies.iterator();
|
||||||
|
String violationFams = it.next();
|
||||||
|
while(it.hasNext()){
|
||||||
|
violationFams += ","+it.next();
|
||||||
|
}
|
||||||
|
return violationFams;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getViolationFamilies(){
|
||||||
|
return violationFamilies;
|
||||||
|
}
|
||||||
|
|
||||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
|
||||||
|
|
||||||
public String getSampleMom() {
|
|
||||||
return sampleMom;
|
|
||||||
}
|
|
||||||
public String getSampleDad() {
|
|
||||||
return sampleDad;
|
|
||||||
}
|
|
||||||
public String getSampleChild() {
|
|
||||||
return sampleChild;
|
|
||||||
}
|
|
||||||
public double getMinGenotypeQuality() {
|
public double getMinGenotypeQuality() {
|
||||||
return minGenotypeQuality;
|
return minGenotypeQuality;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Constructor
|
||||||
* @param sampleMomP - sample name of mom
|
|
||||||
* @param sampleDadP - sample name of dad
|
|
||||||
* @param sampleChildP - sample name of child
|
|
||||||
*/
|
|
||||||
public MendelianViolation (String sampleMomP, String sampleDadP, String sampleChildP) {
|
|
||||||
sampleMom = sampleMomP;
|
|
||||||
sampleDad = sampleDadP;
|
|
||||||
sampleChild = sampleChildP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param family - the sample names string "mom+dad=child"
|
|
||||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||||
*/
|
|
||||||
public MendelianViolation(String family, double minGenotypeQualityP) {
|
|
||||||
minGenotypeQuality = minGenotypeQualityP;
|
|
||||||
|
|
||||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
|
||||||
if (m.matches()) {
|
|
||||||
sampleMom = m.group(1);
|
|
||||||
sampleDad = m.group(2);
|
|
||||||
sampleChild = m.group(3);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
|
|
||||||
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
|
|
||||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation
|
|
||||||
*/
|
|
||||||
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
|
|
||||||
sampleMom = sample.getMother().getID();
|
|
||||||
sampleDad = sample.getFather().getID();
|
|
||||||
sampleChild = sample.getID();
|
|
||||||
minGenotypeQuality = minGenotypeQualityP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
|
|
||||||
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
|
|
||||||
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
|
|
||||||
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
|
|
||||||
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
|
|
||||||
*/
|
|
||||||
public boolean setAlleles (VariantContext vc)
|
|
||||||
{
|
|
||||||
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
|
|
||||||
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
|
|
||||||
Genotype gChild = vc.getGenotypes(sampleChild).get(sampleChild);
|
|
||||||
|
|
||||||
if (gMom == null || gDad == null || gChild == null)
|
|
||||||
throw new IllegalArgumentException(String.format("Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s", vc.getChr(), vc.getStart(), sampleMom, sampleDad, sampleChild));
|
|
||||||
|
|
||||||
if (gMom.isNoCall() || gDad.isNoCall() || gChild.isNoCall() ||
|
|
||||||
gMom.getPhredScaledQual() < minGenotypeQuality ||
|
|
||||||
gDad.getPhredScaledQual() < minGenotypeQuality ||
|
|
||||||
gChild.getPhredScaledQual() < minGenotypeQuality ) {
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
allelesMom = gMom.getAlleles();
|
|
||||||
allelesDad = gDad.getAlleles();
|
|
||||||
allelesChild = gChild.getAlleles();
|
|
||||||
return !allelesMom.isEmpty() && !allelesDad.isEmpty() && !allelesChild.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
*
|
||||||
|
*/
|
||||||
|
public MendelianViolation(double minGenotypeQualityP) {
|
||||||
|
this(minGenotypeQualityP,true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||||
|
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
|
||||||
|
*/
|
||||||
|
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound) {
|
||||||
|
minGenotypeQuality = minGenotypeQualityP;
|
||||||
|
this.abortOnSampleNotFound = abortOnSampleNotFound;
|
||||||
|
violationFamilies = new ArrayList<String>();
|
||||||
|
createInheritanceMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||||
|
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
|
||||||
|
* @param completeTriosOnly - whether only complete trios are considered or parent/child pairs are too.
|
||||||
|
*/
|
||||||
|
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound, boolean completeTriosOnly) {
|
||||||
|
minGenotypeQuality = minGenotypeQualityP;
|
||||||
|
this.abortOnSampleNotFound = abortOnSampleNotFound;
|
||||||
|
violationFamilies = new ArrayList<String>();
|
||||||
|
createInheritanceMap();
|
||||||
|
allCalledOnly = completeTriosOnly;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param families the families to be checked for Mendelian violations
|
||||||
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
|
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
|
||||||
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public boolean isViolation(VariantContext vc)
|
|
||||||
{
|
|
||||||
return setAlleles(vc) && isViolation();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return whether or not there is a mendelian violation at the site.
|
* @return whether or not there is a mendelian violation at the site.
|
||||||
*/
|
*/
|
||||||
public boolean isViolation() {
|
public int countViolations(Map<String, Set<Sample>> families, VariantContext vc){
|
||||||
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
|
|
||||||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
|
//Reset counts
|
||||||
return false;
|
nocall = 0;
|
||||||
return true;
|
lowQual = 0;
|
||||||
|
familyCalled = 0;
|
||||||
|
varFamilyCalled = 0;
|
||||||
|
violations_total=0;
|
||||||
|
violationFamilies.clear();
|
||||||
|
clearInheritanceMap();
|
||||||
|
|
||||||
|
for(Set<Sample> family : families.values()){
|
||||||
|
Iterator<Sample> sampleIterator = family.iterator();
|
||||||
|
Sample sample;
|
||||||
|
while(sampleIterator.hasNext()){
|
||||||
|
sample = sampleIterator.next();
|
||||||
|
if(sample.getParents().size() > 0)
|
||||||
|
updateViolations(sample.getFamilyID(),sample.getMaternalID(), sample.getPaternalID(), sample.getID() ,vc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return violations_total;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isViolation(Sample mother, Sample father, Sample child, VariantContext vc){
|
||||||
|
|
||||||
|
//Reset counts
|
||||||
|
nocall = 0;
|
||||||
|
lowQual = 0;
|
||||||
|
familyCalled = 0;
|
||||||
|
varFamilyCalled = 0;
|
||||||
|
violations_total=0;
|
||||||
|
violationFamilies.clear();
|
||||||
|
clearInheritanceMap();
|
||||||
|
updateViolations(mother.getFamilyID(),mother.getID(),father.getID(),child.getID(),vc);
|
||||||
|
return violations_total>0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void updateViolations(String familyId, String motherId, String fatherId, String childId, VariantContext vc){
|
||||||
|
|
||||||
|
int count;
|
||||||
|
Genotype gMom = vc.getGenotype(motherId);
|
||||||
|
Genotype gDad = vc.getGenotype(fatherId);
|
||||||
|
Genotype gChild = vc.getGenotype(childId);
|
||||||
|
|
||||||
|
if (gMom == null || gDad == null || gChild == null){
|
||||||
|
if(abortOnSampleNotFound)
|
||||||
|
throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getChr(), vc.getStart(), familyId, motherId, fatherId, childId));
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
//Count No calls
|
||||||
|
if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){
|
||||||
|
nocall++;
|
||||||
|
}
|
||||||
|
else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){
|
||||||
|
nocall++;
|
||||||
|
}
|
||||||
|
//Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
|
||||||
|
else if (minGenotypeQuality>0 && (gMom.getPhredScaledQual() < minGenotypeQuality ||
|
||||||
|
gDad.getPhredScaledQual() < minGenotypeQuality ||
|
||||||
|
gChild.getPhredScaledQual() < minGenotypeQuality )) {
|
||||||
|
lowQual++;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
//Count all families per loci called
|
||||||
|
familyCalled++;
|
||||||
|
//If the family is all homref, not too interesting
|
||||||
|
if(!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef()))
|
||||||
|
{
|
||||||
|
varFamilyCalled++;
|
||||||
|
if(isViolation(gMom, gDad, gChild)){
|
||||||
|
violationFamilies.add(familyId);
|
||||||
|
violations_total++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType());
|
||||||
|
inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isViolation(Genotype gMom, Genotype gDad, Genotype gChild) {
|
||||||
|
//1 parent is no "call
|
||||||
|
if(!gMom.isCalled()){
|
||||||
|
return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef());
|
||||||
|
}
|
||||||
|
else if(!gDad.isCalled()){
|
||||||
|
return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef());
|
||||||
|
}
|
||||||
|
//Both parents have genotype information
|
||||||
|
return !(gMom.getAlleles().contains(gChild.getAlleles().get(0)) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) ||
|
||||||
|
gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(gChild.getAlleles().get(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createInheritanceMap(){
|
||||||
|
|
||||||
|
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||||
|
for(Genotype.Type mType : Genotype.Type.values()){
|
||||||
|
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||||
|
for(Genotype.Type dType : Genotype.Type.values()){
|
||||||
|
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
|
||||||
|
for(Genotype.Type cType : Genotype.Type.values()){
|
||||||
|
inheritance.get(mType).get(dType).put(cType, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void clearInheritanceMap(){
|
||||||
|
for(Genotype.Type mType : Genotype.Type.values()){
|
||||||
|
for(Genotype.Type dType : Genotype.Type.values()){
|
||||||
|
for(Genotype.Type cType : Genotype.Type.values()){
|
||||||
|
inheritance.get(mType).get(dType).put(cType, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the likelihood ratio for a mendelian violation
|
* @return the likelihood ratio for a mendelian violation
|
||||||
*/
|
*/
|
||||||
public double violationLikelihoodRatio(VariantContext vc) {
|
public double violationLikelihoodRatio(VariantContext vc, String motherId, String fatherId, String childId) {
|
||||||
double[] logLikAssignments = new double[27];
|
double[] logLikAssignments = new double[27];
|
||||||
// the matrix to set up is
|
// the matrix to set up is
|
||||||
// MOM DAD CHILD
|
// MOM DAD CHILD
|
||||||
|
|
@ -152,9 +399,9 @@ public class MendelianViolation {
|
||||||
// AA AB | AB
|
// AA AB | AB
|
||||||
// |- BB
|
// |- BB
|
||||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
double[] momGL = vc.getGenotype(motherId).getLikelihoods().getAsVector();
|
||||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
double[] dadGL = vc.getGenotype(fatherId).getLikelihoods().getAsVector();
|
||||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
double[] childGL = vc.getGenotype(childId).getLikelihoods().getAsVector();
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||||
|
|
|
||||||
|
|
@ -27,11 +27,42 @@ public class SampleDBUnitTest extends BaseTest {
|
||||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
|
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
|
||||||
|
|
||||||
|
private static final Set<Sample> testPEDFamilyF2 = new HashSet<Sample>(Arrays.asList(
|
||||||
|
new Sample("s2", "fam2", "d2", "m2", Gender.FEMALE, Affection.AFFECTED),
|
||||||
|
new Sample("d2", "fam2", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||||
|
new Sample("m2", "fam2", null, null, Gender.FEMALE, Affection.UNKNOWN)
|
||||||
|
));
|
||||||
|
|
||||||
|
private static final Set<Sample> testPEDFamilyF3 = new HashSet<Sample>(Arrays.asList(
|
||||||
|
new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED),
|
||||||
|
new Sample("d1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN),
|
||||||
|
new Sample("m1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN)
|
||||||
|
));
|
||||||
|
|
||||||
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
|
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
|
||||||
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
||||||
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
||||||
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
|
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
|
||||||
|
|
||||||
|
private static final HashMap<String, Set<Sample>> testGetFamilies = new HashMap<String,Set<Sample>>();
|
||||||
|
static {
|
||||||
|
testGetFamilies.put("fam1", testPEDSamples);
|
||||||
|
testGetFamilies.put("fam2", testPEDFamilyF2);
|
||||||
|
testGetFamilies.put("fam3", testPEDFamilyF3);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final Set<Sample> testKidsWithParentsFamilies2 = new HashSet<Sample>(Arrays.asList(
|
||||||
|
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||||
|
new Sample("kid3", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED),
|
||||||
|
new Sample("kid2", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED)));
|
||||||
|
|
||||||
|
private static final HashSet<String> testGetPartialFamiliesIds = new HashSet<String>(Arrays.asList("kid","s1"));
|
||||||
|
private static final HashMap<String, Set<Sample>> testGetPartialFamilies = new HashMap<String,Set<Sample>>();
|
||||||
|
static {
|
||||||
|
testGetPartialFamilies.put("fam1", new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
|
||||||
|
testGetPartialFamilies.put("fam3", new HashSet<Sample>(Arrays.asList(new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED))));
|
||||||
|
}
|
||||||
|
|
||||||
private static final String testPEDString =
|
private static final String testPEDString =
|
||||||
String.format("%s%n%s%n%s",
|
String.format("%s%n%s%n%s",
|
||||||
"fam1 kid dad mom 1 2",
|
"fam1 kid dad mom 1 2",
|
||||||
|
|
@ -46,6 +77,18 @@ public class SampleDBUnitTest extends BaseTest {
|
||||||
"fam3 s1 d1 m1 2 2",
|
"fam3 s1 d1 m1 2 2",
|
||||||
"fam2 s2 d2 m2 2 2");
|
"fam2 s2 d2 m2 2 2");
|
||||||
|
|
||||||
|
private static final String testPEDMultipleFamilies2 =
|
||||||
|
String.format("%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s",
|
||||||
|
"fam1 kid dad mom 1 2",
|
||||||
|
"fam1 dad 0 0 1 1",
|
||||||
|
"fam1 mom 0 0 2 2",
|
||||||
|
"fam4 kid4 dad4 0 1 2",
|
||||||
|
"fam4 dad4 0 0 1 1",
|
||||||
|
"fam5 kid2 dad2 mom2 1 2",
|
||||||
|
"fam5 kid3 dad2 mom2 1 2",
|
||||||
|
"fam5 dad2 0 0 1 1",
|
||||||
|
"fam5 mom2 0 0 2 2");
|
||||||
|
|
||||||
private static final String testPEDStringInconsistentGender =
|
private static final String testPEDStringInconsistentGender =
|
||||||
"fam1 kid 0 0 2 2";
|
"fam1 kid 0 0 2 2";
|
||||||
|
|
||||||
|
|
@ -138,6 +181,25 @@ public class SampleDBUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
|
Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test()
|
||||||
|
public void getFamilies(){
|
||||||
|
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
|
||||||
|
SampleDB db = builder.getFinalSampleDB();
|
||||||
|
Assert.assertEquals(db.getFamilies(),testGetFamilies);
|
||||||
|
Assert.assertEquals(db.getFamilies(null),testGetFamilies);
|
||||||
|
Assert.assertEquals(db.getFamilies(testGetPartialFamiliesIds),testGetPartialFamilies);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test()
|
||||||
|
public void testGetChildrenWithParents()
|
||||||
|
{
|
||||||
|
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies2));
|
||||||
|
SampleDB db = builder.getFinalSampleDB();
|
||||||
|
Assert.assertEquals(db.getChildrenWithParents(), testKidsWithParentsFamilies2);
|
||||||
|
Assert.assertEquals(db.getChildrenWithParents(false), testKidsWithParentsFamilies2);
|
||||||
|
Assert.assertEquals(db.getChildrenWithParents(true), new HashSet<Sample>(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED))));
|
||||||
|
}
|
||||||
|
|
||||||
@Test()
|
@Test()
|
||||||
public void loadFamilyIDs() {
|
public void loadFamilyIDs() {
|
||||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
|
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
||||||
|
public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
static double[] AA1, AB1, BB1;
|
||||||
|
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
|
||||||
|
static final int numSamples = 3;
|
||||||
|
static double[][] priors = new double[2][2*numSamples+1]; // flat priors
|
||||||
|
|
||||||
|
@BeforeSuite
|
||||||
|
public void before() {
|
||||||
|
AA1 = new double[]{0.0, -20.0, -20.0};
|
||||||
|
AB1 = new double[]{-20.0, 0.0, -20.0};
|
||||||
|
BB1 = new double[]{-20.0, -20.0, 0.0};
|
||||||
|
AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
|
||||||
|
AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0};
|
||||||
|
AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
|
||||||
|
BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
|
||||||
|
BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
|
||||||
|
CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0};
|
||||||
|
}
|
||||||
|
|
||||||
|
private class GetGLsTest extends TestDataProvider {
|
||||||
|
GenotypesContext GLs;
|
||||||
|
int numAltAlleles;
|
||||||
|
String name;
|
||||||
|
|
||||||
|
private GetGLsTest(String name, int numAltAlleles, Genotype... arg) {
|
||||||
|
super(GetGLsTest.class, name);
|
||||||
|
GLs = GenotypesContext.create(arg);
|
||||||
|
this.name = name;
|
||||||
|
this.numAltAlleles = numAltAlleles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("%s input=%s", super.toString(), GLs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Genotype createGenotype(String name, double[] gls) {
|
||||||
|
return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls);
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "getGLs")
|
||||||
|
public Object[][] createGLsData() {
|
||||||
|
|
||||||
|
// bi-allelic case
|
||||||
|
new GetGLsTest("B0", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AA3", AA1));
|
||||||
|
new GetGLsTest("B1", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AB", AB1));
|
||||||
|
new GetGLsTest("B2", 1, createGenotype("AA1", AA1), createGenotype("BB", BB1), createGenotype("AA2", AA1));
|
||||||
|
new GetGLsTest("B3a", 1, createGenotype("AB", AB1), createGenotype("AA", AA1), createGenotype("BB", BB1));
|
||||||
|
new GetGLsTest("B3b", 1, createGenotype("AB1", AB1), createGenotype("AB2", AB1), createGenotype("AB3", AB1));
|
||||||
|
new GetGLsTest("B4", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("AA", AA1));
|
||||||
|
new GetGLsTest("B5", 1, createGenotype("BB1", BB1), createGenotype("AB", AB1), createGenotype("BB2", BB1));
|
||||||
|
new GetGLsTest("B6", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("BB3", BB1));
|
||||||
|
|
||||||
|
// tri-allelic case
|
||||||
|
new GetGLsTest("B1C0", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AB", AB2));
|
||||||
|
new GetGLsTest("B0C1", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AC", AC2));
|
||||||
|
new GetGLsTest("B1C1a", 2, createGenotype("AA", AA2), createGenotype("AB", AB2), createGenotype("AC", AC2));
|
||||||
|
new GetGLsTest("B1C1b", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("BC", BC2));
|
||||||
|
new GetGLsTest("B2C1", 2, createGenotype("AB1", AB2), createGenotype("AB2", AB2), createGenotype("AC", AC2));
|
||||||
|
new GetGLsTest("B3C2a", 2, createGenotype("AB", AB2), createGenotype("BC1", BC2), createGenotype("BC2", BC2));
|
||||||
|
new GetGLsTest("B3C2b", 2, createGenotype("AB", AB2), createGenotype("BB", BB2), createGenotype("CC", CC2));
|
||||||
|
|
||||||
|
return GetGLsTest.getTests(GetGLsTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(dataProvider = "getGLs")
|
||||||
|
public void testGLs(GetGLsTest cfg) {
|
||||||
|
|
||||||
|
final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1];
|
||||||
|
final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1];
|
||||||
|
for ( int i = 0; i < 2; i++ ) {
|
||||||
|
for ( int j = 0; j < 2*numSamples+1; j++ ) {
|
||||||
|
log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||||
|
log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
|
||||||
|
|
||||||
|
int nameIndex = 1;
|
||||||
|
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||||
|
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||||
|
int calculatedAlleleCount = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors[allele]);
|
||||||
|
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -7,7 +7,6 @@ import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
// ********************************************************************************** //
|
// ********************************************************************************** //
|
||||||
|
|
@ -29,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultiSamplePilot1() {
|
public void testMultiSamplePilot1() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||||
Arrays.asList("286f0de92e4ce57986ba861390c6019d"));
|
Arrays.asList("a2d3839c4ebb390b0012d495e4e53b3a"));
|
||||||
executeTest("test MultiSample Pilot1", spec);
|
executeTest("test MultiSample Pilot1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -45,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testWithAllelesPassedIn2() {
|
public void testWithAllelesPassedIn2() {
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||||
Arrays.asList("d0593483e85a7d815f4c5ee6db284d2a"));
|
Arrays.asList("43e7a17d95b1a0cf72e669657794d802"));
|
||||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -53,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testSingleSamplePilot2() {
|
public void testSingleSamplePilot2() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("3ccce5d909f8f128e496f6841836e5f7"));
|
Arrays.asList("ae29b9c9aacce8046dc780430540cd62"));
|
||||||
executeTest("test SingleSample Pilot2", spec);
|
executeTest("test SingleSample Pilot2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864";
|
private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCompressedOutput() {
|
public void testCompressedOutput() {
|
||||||
|
|
@ -84,7 +83,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||||
|
|
||||||
String md5 = "95614280c565ad90f8c000376fef822c";
|
String md5 = "32a34362dff51d8b73a3335048516d82";
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||||
|
|
@ -165,8 +164,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testHeterozyosity() {
|
public void testHeterozyosity() {
|
||||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||||
e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" );
|
e.put( 0.01, "2cb2544739e01f6c08fd820112914317" );
|
||||||
e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" );
|
e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" );
|
||||||
|
|
||||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
@ -276,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("6e182a58472ea17c8b0eb01f80562fbd"));
|
Arrays.asList("45633d905136c86e9d3f90ce613255e5"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -286,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||||
Arrays.asList("f93f8a35b47bcf96594ada55e2312c73"));
|
Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead"));
|
||||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -295,8 +294,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||||
Arrays.asList("9be28cb208d8b0314d2bc2696e2fd8d4"));
|
Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51"));
|
||||||
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
|
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
|
|
@ -291,6 +291,17 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVEMendelianViolationEvaluator() {
|
||||||
|
String vcfFile = "/MendelianViolationEval.vcf";
|
||||||
|
String pedFile = "/MendelianViolationEval.ped";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST",
|
||||||
|
1,
|
||||||
|
Arrays.asList("66e72c887124f40933d32254b2dd44a3"));
|
||||||
|
executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCompVsEvalAC() {
|
public void testCompVsEvalAC() {
|
||||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||||
|
|
|
||||||
|
|
@ -115,4 +115,26 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
executeTest("testUsingDbsnpName--" + testFile, spec);
|
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParallelization() {
|
||||||
|
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||||
|
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||||
|
WalkerTestSpec spec;
|
||||||
|
|
||||||
|
spec = new WalkerTestSpec(
|
||||||
|
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||||
|
1,
|
||||||
|
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
|
||||||
|
);
|
||||||
|
executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||||
|
|
||||||
|
spec = new WalkerTestSpec(
|
||||||
|
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||||
|
1,
|
||||||
|
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest("testParallelization (4 threads)--" + testfile, spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,10 @@ class DataProcessingPipeline extends QScript {
|
||||||
@Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false)
|
@Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false)
|
||||||
var defaultPlatform: String = ""
|
var defaultPlatform: String = ""
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Input(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false)
|
||||||
|
var testMode: Boolean = false
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Global Variables
|
* Global Variables
|
||||||
|
|
@ -335,6 +339,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.known ++= qscript.indels
|
this.known ++= qscript.indels
|
||||||
this.consensusDeterminationModel = cleanModelEnum
|
this.consensusDeterminationModel = cleanModelEnum
|
||||||
this.compress = 0
|
this.compress = 0
|
||||||
|
this.noPGTag = qscript.testMode;
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = nContigs
|
||||||
this.analysisName = queueLogDir + outBam + ".clean"
|
this.analysisName = queueLogDir + outBam + ".clean"
|
||||||
this.jobName = queueLogDir + outBam + ".clean"
|
this.jobName = queueLogDir + outBam + ".clean"
|
||||||
|
|
@ -360,6 +365,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.out = outBam
|
this.out = outBam
|
||||||
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
|
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
|
||||||
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
|
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
|
||||||
|
this.no_pg_tag = qscript.testMode
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = nContigs
|
||||||
this.isIntermediate = false
|
this.isIntermediate = false
|
||||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,10 @@ class PacbioProcessingPipeline extends QScript {
|
||||||
@Input(shortName="bwastring", required=false)
|
@Input(shortName="bwastring", required=false)
|
||||||
var bwastring: String = ""
|
var bwastring: String = ""
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Input(shortName = "test", fullName = "test_mode", required = false)
|
||||||
|
var testMode: Boolean = false
|
||||||
|
|
||||||
val queueLogDir: String = ".qlog/"
|
val queueLogDir: String = ".qlog/"
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
|
|
@ -170,6 +174,7 @@ class PacbioProcessingPipeline extends QScript {
|
||||||
this.input_file :+= inBam
|
this.input_file :+= inBam
|
||||||
this.recal_file = inRecalFile
|
this.recal_file = inRecalFile
|
||||||
this.out = outBam
|
this.out = outBam
|
||||||
|
this.no_pg_tag = testMode
|
||||||
this.isIntermediate = false
|
this.isIntermediate = false
|
||||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||||
|
|
@ -177,7 +182,6 @@ class PacbioProcessingPipeline extends QScript {
|
||||||
}
|
}
|
||||||
|
|
||||||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||||
this.resources = R
|
|
||||||
this.recal_file = inRecalFile
|
this.recal_file = inRecalFile
|
||||||
this.output_dir = outPath
|
this.output_dir = outPath
|
||||||
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
package org.broadinstitute.sting.queue.pipeline
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.testng.annotations.Test
|
||||||
|
import org.broadinstitute.sting.BaseTest
|
||||||
|
|
||||||
|
class DataProcessingPipelineTest {
|
||||||
|
@Test
|
||||||
|
def testSimpleBAM {
|
||||||
|
val projectName = "test1"
|
||||||
|
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
|
||||||
|
val spec = new PipelineTestSpec
|
||||||
|
spec.name = "DataProcessingPipeline"
|
||||||
|
spec.args = Array(
|
||||||
|
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
|
||||||
|
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||||
|
" -i " + BaseTest.testDir + "exampleBAM.bam",
|
||||||
|
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
|
||||||
|
" -nv ",
|
||||||
|
" -test ",
|
||||||
|
" -p " + projectName).mkString
|
||||||
|
spec.fileMD5s += testOut -> "1f85e76de760167a77ed1d9ab4da2936"
|
||||||
|
PipelineTest.executeTest(spec)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testBWAPEBAM {
|
||||||
|
val projectName = "test2"
|
||||||
|
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
|
||||||
|
val spec = new PipelineTestSpec
|
||||||
|
spec.name = "DataProcessingPipeline"
|
||||||
|
spec.args = Array(
|
||||||
|
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
|
||||||
|
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||||
|
" -i " + BaseTest.testDir + "exampleBAM.bam",
|
||||||
|
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
|
||||||
|
" -nv ",
|
||||||
|
" -test ",
|
||||||
|
" -bwa /home/unix/carneiro/bin/bwa",
|
||||||
|
" -bwape ",
|
||||||
|
" -p " + projectName).mkString
|
||||||
|
spec.fileMD5s += testOut -> "57416a0abdf9524bc92834d466529708"
|
||||||
|
PipelineTest.executeTest(spec)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
package org.broadinstitute.sting.queue.pipeline
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.testng.annotations.Test
|
||||||
|
import org.broadinstitute.sting.BaseTest
|
||||||
|
|
||||||
|
class PacbioProcessingPipelineTest {
|
||||||
|
@Test
|
||||||
|
def testBAM {
|
||||||
|
val testOut = "exampleBAM.recal.bam"
|
||||||
|
val spec = new PipelineTestSpec
|
||||||
|
spec.name = "pacbioProcessingPipeline"
|
||||||
|
spec.args = Array(
|
||||||
|
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala",
|
||||||
|
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||||
|
" -i " + BaseTest.testDir + "exampleBAM.bam",
|
||||||
|
" -blasr ",
|
||||||
|
" -test ",
|
||||||
|
" -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString
|
||||||
|
spec.fileMD5s += testOut -> "f0adce660b55cb91d5f987f9a145471e"
|
||||||
|
PipelineTest.executeTest(spec)
|
||||||
|
}
|
||||||
|
}
|
||||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,282 @@
|
||||||
|
##fileformat=VCFv4.1
|
||||||
|
##FILTER=<ID=NC,Description="Inconsistent Genotype Submission For At Least One Sample">
|
||||||
|
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
|
||||||
|
##INFO=<ID=ASP,Number=0,Type=Flag,Description="Is Assembly specific. This is set if the variant only maps to one assembly">
|
||||||
|
##INFO=<ID=ASS,Number=0,Type=Flag,Description="In acceptor splice site FxnCode = 73">
|
||||||
|
##INFO=<ID=CDA,Number=0,Type=Flag,Description="Variation is interrogated in a clinical diagnostic assay">
|
||||||
|
##INFO=<ID=CFL,Number=0,Type=Flag,Description="Has Assembly conflict. This is for weight 1 and 2 variant that maps to different chromosomes on different assemblies.">
|
||||||
|
##INFO=<ID=CLN,Number=0,Type=Flag,Description="Variant is Clinical(LSDB,OMIM,TPA,Diagnostic)">
|
||||||
|
##INFO=<ID=DSS,Number=0,Type=Flag,Description="In donor splice-site FxnCode = 75">
|
||||||
|
##INFO=<ID=G5,Number=0,Type=Flag,Description=">5% minor allele frequency in 1+ populations">
|
||||||
|
##INFO=<ID=G5A,Number=0,Type=Flag,Description=">5% minor allele frequency in each and all populations">
|
||||||
|
##INFO=<ID=GCF,Number=0,Type=Flag,Description="Has Genotype Conflict Same (rs, ind), different genotype. N/N is not included.">
|
||||||
|
##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar (|)">
|
||||||
|
##INFO=<ID=GMAF,Number=1,Type=Float,Description="Global Minor Allele Frequency [0, 0.5]; global population is 1000GenomesProject phase 1 genotype data from 629 individuals, released in the 08-04-2010 dataset">
|
||||||
|
##INFO=<ID=GNO,Number=0,Type=Flag,Description="Genotypes available. The variant has individual genotype (in SubInd table).">
|
||||||
|
##INFO=<ID=HD,Number=0,Type=Flag,Description="Marker is on high density genotyping kit (50K density or greater). The variant may have phenotype associations present in dbGaP.">
|
||||||
|
##INFO=<ID=INT,Number=0,Type=Flag,Description="In Intron FxnCode = 6">
|
||||||
|
##INFO=<ID=KGPROD,Number=0,Type=Flag,Description="1000 Genome production phase">
|
||||||
|
##INFO=<ID=KGPilot1,Number=0,Type=Flag,Description="1000 Genome discovery(pilot1) 2009">
|
||||||
|
##INFO=<ID=KGPilot123,Number=0,Type=Flag,Description="1000 Genome discovery all pilots 2010(1,2,3)">
|
||||||
|
##INFO=<ID=KGVAL,Number=0,Type=Flag,Description="1000 Genome validated by second method">
|
||||||
|
##INFO=<ID=LSD,Number=0,Type=Flag,Description="Submitted from a locus-specific database">
|
||||||
|
##INFO=<ID=MTP,Number=0,Type=Flag,Description="Microattribution/third-party annotation(TPA:GWAS,PAGE)">
|
||||||
|
##INFO=<ID=MUT,Number=0,Type=Flag,Description="Is mutation (journal citation, explicit fact): a low frequency variation that is cited in journal and other reputable sources">
|
||||||
|
##INFO=<ID=NOC,Number=0,Type=Flag,Description="Contig allele not present in variant allele list. The reference sequence allele at the mapped position is not present in the variant allele list, adjusted for orientation.">
|
||||||
|
##INFO=<ID=NOV,Number=0,Type=Flag,Description="Rs cluster has non-overlapping allele sets. True when rs set has more than 2 alleles from different submissions and these sets share no alleles in common.">
|
||||||
|
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
|
||||||
|
##INFO=<ID=NSF,Number=0,Type=Flag,Description="Has non-synonymous frameshift A coding region variation where one allele in the set changes all downstream amino acids. FxnClass = 44">
|
||||||
|
##INFO=<ID=NSM,Number=0,Type=Flag,Description="Has non-synonymous missense A coding region variation where one allele in the set changes protein peptide. FxnClass = 42">
|
||||||
|
##INFO=<ID=NSN,Number=0,Type=Flag,Description="Has non-synonymous nonsense A coding region variation where one allele in the set changes to STOP codon (TER). FxnClass = 41">
|
||||||
|
##INFO=<ID=OM,Number=0,Type=Flag,Description="Has OMIM/OMIA">
|
||||||
|
##INFO=<ID=OTH,Number=0,Type=Flag,Description="Has other variant with exactly the same set of mapped positions on NCBI refernce assembly.">
|
||||||
|
##INFO=<ID=PH1,Number=0,Type=Flag,Description="Phase 1 genotyped: filtered, non-redundant">
|
||||||
|
##INFO=<ID=PH2,Number=0,Type=Flag,Description="Phase 2 genotyped: filtered, non-redundant">
|
||||||
|
##INFO=<ID=PH3,Number=0,Type=Flag,Description="Phase 3 genotyped: filtered, non-redundant">
|
||||||
|
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
|
||||||
|
##INFO=<ID=PMC,Number=0,Type=Flag,Description="Links exist to PubMed Central article">
|
||||||
|
##INFO=<ID=R3,Number=0,Type=Flag,Description="In 3' gene region FxnCode = 13">
|
||||||
|
##INFO=<ID=R5,Number=0,Type=Flag,Description="In 5' gene region FxnCode = 15">
|
||||||
|
##INFO=<ID=REF,Number=0,Type=Flag,Description="Has reference A coding region variation where one allele in the set is identical to the reference sequence. FxnCode = 8">
|
||||||
|
##INFO=<ID=RSPOS,Number=1,Type=Integer,Description="Chr position reported in dbSNP">
|
||||||
|
##INFO=<ID=RV,Number=0,Type=Flag,Description="RS orientation is reversed">
|
||||||
|
##INFO=<ID=S3D,Number=0,Type=Flag,Description="Has 3D structure - SNP3D table">
|
||||||
|
##INFO=<ID=SAO,Number=1,Type=Integer,Description="Variant Allele Origin: 0 - unspecified, 1 - Germline, 2 - Somatic, 3 - Both">
|
||||||
|
##INFO=<ID=SCS,Number=1,Type=Integer,Description="Variant Clinical Significance, 0 - unknown, 1 - untested, 2 - non-pathogenic, 3 - probable-non-pathogenic, 4 - probable-pathogenic, 5 - pathogenic, 6 - drug-response, 7 - histocompatibility, 255 - other">
|
||||||
|
##INFO=<ID=SLO,Number=0,Type=Flag,Description="Has SubmitterLinkOut - From SNP->SubSNP->Batch.link_out">
|
||||||
|
##INFO=<ID=SSR,Number=1,Type=Integer,Description="Variant Suspect Reason Code, 0 - unspecified, 1 - Paralog, 2 - byEST, 3 - Para_EST, 4 - oldAlign, 5 - other">
|
||||||
|
##INFO=<ID=SYN,Number=0,Type=Flag,Description="Has synonymous A coding region variation where one allele in the set does not change the encoded amino acid. FxnCode = 3">
|
||||||
|
##INFO=<ID=TPA,Number=0,Type=Flag,Description="Provisional Third Party Annotation(TPA) (currently rs from PHARMGKB who will give phenotype data)">
|
||||||
|
##INFO=<ID=U3,Number=0,Type=Flag,Description="In 3' UTR Location is in an untranslated region (UTR). FxnCode = 53">
|
||||||
|
##INFO=<ID=U5,Number=0,Type=Flag,Description="In 5' UTR Location is in an untranslated region (UTR). FxnCode = 55">
|
||||||
|
##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
|
||||||
|
##INFO=<ID=VLD,Number=0,Type=Flag,Description="Is Validated. This bit is set if the variant has 2+ minor allele count based on frequency or genotype data.">
|
||||||
|
##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property">
|
||||||
|
##INFO=<ID=WGT,Number=1,Type=Integer,Description="Weight, 00 - unmapped, 1 - weight 1, 2 - weight 2, 3 - weight 3 or more">
|
||||||
|
##INFO=<ID=WTD,Number=0,Type=Flag,Description="Is Withdrawn by submitter If one member ss is withdrawn by submitter, then this bit is set. If all member ss' are withdrawn, then the rs is deleted to SNPHistory">
|
||||||
|
##INFO=<ID=dbSNPBuildID,Number=1,Type=Integer,Description="First dbSNP Build for RS">
|
||||||
|
##LeftAlignVariants="analysis_type=LeftAlignVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=00-All.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false"
|
||||||
|
##contig=<ID=chr1,length=249250621,assembly=b37>
|
||||||
|
##phasing=partial
|
||||||
|
##reference=GRCh37.3
|
||||||
|
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
|
||||||
|
##source=dbSNP
|
||||||
|
##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf
|
||||||
|
#CHROM POS ID REF ALT QUAL FILTER INFO
|
||||||
|
chr1 10144 rs144773400 TA T . PASS ASP;RSPOS=10145;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10228 rs143255646 TA T . PASS ASP;RSPOS=10229;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10234 rs145599635 C T . PASS ASP;RSPOS=10234;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10248 rs148908337 A T . PASS ASP;RSPOS=10248;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10254 rs140194106 TA T . PASS ASP;RSPOS=10255;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10291 rs145427775 C T . PASS ASP;RSPOS=10291;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10327 rs112750067 T C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10327;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 10329 rs150969722 AC A . PASS ASP;RSPOS=10330;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10351 rs145072688 CTA C,CA . PASS ASP;RSPOS=10352;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10382 rs147093981 AAC A,AC . PASS ASP;RSPOS=10383;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10433 rs56289060 A AC . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10433;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 10439 rs112766696 AC A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10440;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 10439 rs138941843 AC A . PASS ASP;RSPOS=10440;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 10440 rs112155239 C A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10440;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 10492 rs55998931 C T . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.0617001828153565;RSPOS=10492;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040000000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 10519 rs62636508 G C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10519;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 10583 rs58108140 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.270566727605119;KGPilot123;RSPOS=10583;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 10611 rs189107123 C G . PASS KGPilot123;RSPOS=10611;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 10828 rs10218492 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10828;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
|
||||||
|
chr1 10904 rs10218493 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10904;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=119
|
||||||
|
chr1 10927 rs10218527 A G . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10927;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
|
||||||
|
chr1 10938 rs28853987 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10938;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 11014 rs28484712 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11014;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 11022 rs28775022 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11022;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 11081 rs10218495 G T . PASS CFL;GENEINFO=LOC100652771:100652771;GNO;RSPOS=11081;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=119
|
||||||
|
chr1 11863 rs187669455 C A . PASS RSPOS=11863;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 13302 rs180734498 C T . PASS KGPilot123;RSPOS=13302;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 13327 rs144762171 G C . PASS ASP;KGPilot123;RSPOS=13327;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 13684 rs71260404 C T . PASS GENEINFO=LOC100652771:100652771;GNO;RSPOS=13684;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 13980 rs151276478 T C . PASS ASP;KGPilot123;RSPOS=13980;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 14889 rs142444908 G A . PASS ASP;RSPOS=14889;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 14907 rs79585140 A G . PASS GNO;RSPOS=14907;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 14930 rs75454623 A G . PASS GNO;RSPOS=14930;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 14976 rs71252251 G A . PASS ASP;GNO;RSPOS=14976;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 15061 rs71268703 T TG . PASS ASP;GNO;RSPOS=15061;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 15118 rs71252250 A G . PASS ASP;GNO;RSPOS=15118;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 15211 rs144718396 T G . PASS ASP;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 15211 rs78601809 T G . PASS ASP;GNO;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 16257 rs78588380 G C . PASS ASP;GNO;RSPOS=16257;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 16378 rs148220436 T C . PASS ASP;RSPOS=16378;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 16495 rs141130360 G C . PASS ASP;RSPOS=16495;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 16497 rs150723783 A G . PASS ASP;RSPOS=16497;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 17519 rs192890528 G T . PASS RSPOS=17519;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 19226 rs138930629 T A . PASS ASP;RSPOS=19226;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 20141 rs56336884 G A . PASS HD;RSPOS=20141;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000400000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 20144 rs143346096 G A . PASS ASP;RSPOS=20144;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 20206 rs71262675 C T . PASS GNO;RSPOS=20206;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 20245 rs71262674 G A . PASS GMAF=0.256398537477148;GNO;RSPOS=20245;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 20304 rs71262673 G C . PASS GMAF=0.338208409506399;GNO;RSPOS=20304;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 26999 rs147506580 A G . PASS ASP;RSPOS=26999;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 29436 rs2462493 G A . PASS GNO;RSPOS=29436;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=100
|
||||||
|
chr1 30923 rs140337953 G T . PASS ASP;KGPilot123;RSPOS=30923;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 33487 rs77459554 C T . PASS ASP;GNO;RSPOS=33487;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 33495 rs75468675 C T . PASS ASP;GNO;RSPOS=33495;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 33505 rs75627161 T C . PASS ASP;GNO;RSPOS=33505;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 33508 rs75609629 A T . PASS ASP;GNO;RSPOS=33508;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 33521 rs76098219 T A . PASS GNO;RSPOS=33521;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 33593 rs557585 G A . PASS RSPOS=33593;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=83
|
||||||
|
chr1 33648 rs62028204 G T . PASS RSPOS=33648;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 33656 rs113821789 T C . PASS RSPOS=33656;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 51476 rs187298206 T C . PASS KGPilot123;RSPOS=51476;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 51479 rs116400033 T A . PASS ASP;G5;G5A;GMAF=0.113802559414991;KGPilot123;RSPOS=51479;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070010000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 51803 rs62637812 T C . PASS GMAF=0.468921389396709;RSPOS=51803;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040000000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 51898 rs76402894 C A . PASS GMAF=0.0731261425959781;GNO;RSPOS=51898;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 51914 rs190452223 T G . PASS KGPilot123;RSPOS=51914;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 51928 rs78732933 G A . PASS GNO;RSPOS=51928;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 51935 rs181754315 C T . PASS KGPilot123;RSPOS=51935;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 51954 rs185832753 G C . PASS KGPilot123;RSPOS=51954;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 52058 rs62637813 G C . PASS GMAF=0.0342778793418647;KGPilot123;RSPOS=52058;SAO=0;SSR=1;VC=SNV;VLD;VP=050000000000040010000140;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 52144 rs190291950 T A . PASS KGPilot123;RSPOS=52144;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 52238 rs150021059 T G . PASS ASP;KGPilot123;RSPOS=52238;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 54353 rs140052487 C A . PASS ASP;KGPilot123;RSPOS=54353;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 54421 rs146477069 A G . PASS ASP;KGPilot123;RSPOS=54421;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 54490 rs141149254 G A . PASS ASP;KGPilot123;RSPOS=54490;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 54676 rs2462492 C T . PASS ASP;GMAF=0.191956124314442;GNO;HD;KGPilot123;RSPOS=54676;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040510000100;WGT=0;dbSNPBuildID=100
|
||||||
|
chr1 54753 rs143174675 T G . PASS ASP;KGPilot123;RSPOS=54753;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 54788 rs59861892 CC C,CCT . PASS ASP;RSPOS=54789;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 54795 rs58014817 T A . PASS ASP;RSPOS=54795;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 55164 rs3091274 C A . PASS G5;G5A;GMAF=0.145338208409506;GNO;KGPilot123;RSPOS=55164;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030110000100;WGT=0;dbSNPBuildID=103
|
||||||
|
chr1 55299 rs10399749 C T . PASS G5;G5A;GMAF=0.278793418647166;GNO;KGPilot123;PH2;RSPOS=55299;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030112000100;WGT=0;dbSNPBuildID=119
|
||||||
|
chr1 55302 rs3091273 C T . PASS RSPOS=55302;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
|
||||||
|
chr1 55313 rs182462964 A T . PASS KGPilot123;RSPOS=55313;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 55322 rs3107974 C T . PASS RSPOS=55322;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
|
||||||
|
chr1 55326 rs3107975 T C . PASS GNO;HD;KGPilot123;RSPOS=55326;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=103
|
||||||
|
chr1 55330 rs185215913 G A . PASS KGPilot123;RSPOS=55330;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 55367 rs190850374 G A . PASS KGPilot123;RSPOS=55367;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 55388 rs182711216 C T . PASS KGPilot123;RSPOS=55388;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 55394 rs2949420 T A . PASS GNO;KGPilot123;PH2;RSPOS=55394;SAO=0;SSR=0;VC=SNV;VP=050000000000000112000100;WGT=0;dbSNPBuildID=101
|
||||||
|
chr1 55416 rs193242050 G A . PASS KGPilot123;RSPOS=55416;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 55427 rs183189405 T C . PASS KGPilot123;RSPOS=55427;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 55545 rs28396308 C T . PASS GNO;RSPOS=55545;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 55816 rs187434873 G A . PASS KGPilot123;RSPOS=55816;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 55850 rs191890754 C G . PASS KGPilot123;RSPOS=55850;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 55852 rs184233019 G C . PASS KGPilot123;RSPOS=55852;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 56644 rs143342222 A C . PASS ASP;KGPilot123;RSPOS=56644;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 57952 rs189727433 A C . PASS KGPilot123;RSPOS=57952;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 58771 rs140128481 T C . PASS ASP;RSPOS=58771;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 58814 rs114420996 G A . PASS ASP;G5;GMAF=0.0982632541133455;KGPilot123;RSPOS=58814;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 59040 rs149755937 T C . PASS ASP;KGPilot123;RSPOS=59040;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 60718 rs78395614 G A . PASS CFL;GNO;RSPOS=60718;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 60726 rs192328835 C A . PASS KGPilot123;RSPOS=60726;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 60791 rs76199781 A G . PASS CFL;GNO;RSPOS=60791;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 61442 rs74970982 A G . PASS CFL;GMAF=0.076782449725777;GNO;KGPilot123;RSPOS=61442;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 61462 rs56992750 T A . PASS CFL;G5;G5A;GMAF=0.0383912248628885;GNO;KGPilot123;RSPOS=61462;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008030110000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 61480 rs75526266 G C . PASS CFL;GNO;RSPOS=61480;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 61499 rs75719746 G A . PASS CFL;GNO;RSPOS=61499;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 61743 rs184286948 G C . PASS KGPilot123;RSPOS=61743;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 61920 rs62637820 G A . PASS CFL;GMAF=0.0255941499085923;RSPOS=61920;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 61987 rs76735897 A G . PASS CFL;GMAF=0.292961608775137;GNO;KGPilot123;RSPOS=61987;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 61989 rs77573425 G C . PASS CFL;GMAF=0.309414990859232;GNO;KGPilot123;RSPOS=61989;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 61993 rs190553843 C T . PASS KGPilot123;RSPOS=61993;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 62156 rs181864839 C T . PASS KGPilot123;RSPOS=62156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 62157 rs10399597 G A . PASS CFL;GMAF=0.00228519195612431;KGPilot123;RSPOS=62157;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=119
|
||||||
|
chr1 62162 rs140556834 G A . PASS ASP;KGPilot123;RSPOS=62162;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 62203 rs28402963 T C . PASS CFL;KGPilot123;RSPOS=62203;SAO=0;SSR=0;VC=SNV;VP=050000000008000010000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 62271 rs28599927 A G . PASS CFL;GMAF=0.138482632541133;RSPOS=62271;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 63268 rs75478250 T C . PASS CFL;GNO;RSPOS=63268;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 63276 rs185977555 G A . PASS KGPilot123;RSPOS=63276;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 63297 rs188886746 G A . PASS KGPilot123;RSPOS=63297;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 63671 rs116440577 G A . PASS ASP;G5;GMAF=0.170018281535649;KGPilot123;RSPOS=63671;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 63737 rs77426996 TACT T,TCTA . PASS CFL;RSPOS=63738;SAO=0;SSR=0;VC=DIV;VP=050000000008000000000200;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 64649 rs181431124 A C . PASS KGPilot123;RSPOS=64649;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 66008 rs2691286 C G . PASS CFL;GNO;RSPOS=66008;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=100
|
||||||
|
chr1 66162 rs62639105 A T . PASS CFL;GMAF=0.320383912248629;GNO;KGPilot123;RSPOS=66162;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000110000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 66176 rs28552463 T A . PASS CFL;GMAF=0.0484460694698355;KGPilot123;RSPOS=66176;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 66219 rs181028663 A T . PASS KGPilot123;RSPOS=66219;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 66238 rs113961546 T A . PASS CFL;GNO;RSPOS=66238;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 66314 rs28534012 T A . PASS CFL;RSPOS=66314;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 66331 rs186063952 A C . PASS KGPilot123;RSPOS=66331;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 66334 rs28464214 T A . PASS CFL;RSPOS=66334;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 66442 rs192044252 T A . PASS KGPilot123;RSPOS=66442;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 66457 rs13328655 T A . PASS CFL;GMAF=0.0795246800731261;KGPilot123;RSPOS=66457;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=121
|
||||||
|
chr1 66503 rs112350669 T A . PASS CFL;RSPOS=66503;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 66507 rs12401368 T A . PASS CFL;GMAF=0.479890310786106;KGPilot123;RSPOS=66507;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=120
|
||||||
|
chr1 66651 rs2257270 A T . PASS CFL;GNO;RSPOS=66651;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=100
|
||||||
|
chr1 67179 rs149952626 C G . PASS ASP;KGPilot123;RSPOS=67179;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 67181 rs77662731 A G . PASS ASP;G5;G5A;GENEINFO=OR4F5:79501;GMAF=0.0470749542961609;GNO;KGPilot123;RSPOS=67181;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070110000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 67223 rs78676975 C A . PASS ASP;GENEINFO=OR4F5:79501;GNO;RSPOS=67223;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 69428 rs140739101 T G . PASS ASP;RSPOS=69428;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 69453 rs142004627 G A . PASS ASP;RSPOS=69453;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 69476 rs148502021 T C . PASS ASP;RSPOS=69476;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 69496 rs150690004 G A . PASS ASP;RSPOS=69496;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 69511 rs75062661 A G . PASS GENEINFO=OR4F5:79501;GMAF=0.193784277879342;GNO;KGPilot123;RSPOS=69511;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000110000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 69534 rs190717287 T C . PASS KGPilot123;RSPOS=69534;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 69552 rs55874132 G C . PASS GENEINFO=OR4F5:79501;HD;RSPOS=69552;S3D;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050300000000040400000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 69590 rs141776804 T A . PASS ASP;RSPOS=69590;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 69594 rs144967600 T C . PASS ASP;RSPOS=69594;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 72148 rs182862337 C T . PASS KGPilot123;RSPOS=72148;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 73841 rs143773730 C T . PASS ASP;KGPilot123;RSPOS=73841;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 74651 rs62641291 G A . PASS RSPOS=74651;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 74681 rs13328683 G T . PASS CFL;GMAF=0.286106032906764;RSPOS=74681;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=121
|
||||||
|
chr1 74709 rs62641292 T A . PASS CFL;RSPOS=74709;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 74771 rs13328675 A G . PASS CFL;RSPOS=74771;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
|
||||||
|
chr1 74790 rs13328700 C G . PASS CFL;RSPOS=74790;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
|
||||||
|
chr1 74792 rs13328684 G A . PASS CFL;RSPOS=74792;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
|
||||||
|
chr1 77462 rs188023513 G A . PASS KGPilot123;RSPOS=77462;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 77470 rs192898053 T C . PASS KGPilot123;RSPOS=77470;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 77874 rs184538873 G A . PASS KGPilot123;RSPOS=77874;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 77961 rs78385339 G A . PASS GMAF=0.125685557586837;KGPilot123;RSPOS=77961;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040010000100;WGT=0;dbSNPBuildID=131
|
||||||
|
chr1 79033 rs62641298 A G . PASS GMAF=0.438299817184644;GNO;HD;KGPilot123;RSPOS=79033;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 79050 rs62641299 G T . PASS GMAF=0.224405850091408;GNO;KGPilot123;RSPOS=79050;SAO=0;SSR=0;VC=SNV;VP=050000000000000110000100;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 79137 rs143777184 A T . PASS ASP;KGPilot123;RSPOS=79137;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 79417 rs184768190 C T . PASS KGPilot123;RSPOS=79417;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 79418 rs2691296 G C . PASS GMAF=0.0178244972577697;RSPOS=79418;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=100
|
||||||
|
chr1 79538 rs2691295 C T . PASS RSPOS=79538;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=100
|
||||||
|
chr1 79772 rs147215883 C G . PASS ASP;KGPilot123;RSPOS=79772;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 79872 rs189224661 T G . PASS KGPilot123;RSPOS=79872;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 80323 rs3942603 G C . PASS CFL;GNO;RSPOS=80323;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 80386 rs3878915 C A . PASS GMAF=0.0118829981718464;RSPOS=80386;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 80454 rs144226842 G C . PASS ASP;KGPilot123;RSPOS=80454;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 81836 rs2259560 A T . PASS ASP;GNO;RSPOS=81836;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=100
|
||||||
|
chr1 81949 rs181567186 T C . PASS KGPilot123;RSPOS=81949;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 81962 rs4030308 T TAA . PASS ASP;RSPOS=81962;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 82102 rs4030307 C T . PASS ASP;RSPOS=82102;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 82103 rs2020400 T C . PASS ASP;RSPOS=82103;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
|
||||||
|
chr1 82126 rs1815133 C T . PASS ASP;RSPOS=82126;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
|
||||||
|
chr1 82133 rs4030306 CA C,CAAAAAAAAAAAAAAA . PASS ASP;RSPOS=82136;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 82154 rs4477212 A G . PASS ASP;HD;RSPOS=82154;SAO=0;SSR=0;VC=SNV;VP=050000000004000400000100;WGT=0;dbSNPBuildID=111
|
||||||
|
chr1 82162 rs1815132 C A . PASS ASP;GMAF=0.0351919561243144;GNO;RSPOS=82162;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=92
|
||||||
|
chr1 82163 rs139113303 G A . PASS ASP;KGPilot123;RSPOS=82163;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 82196 rs112844054 A T . PASS ASP;RSPOS=82196;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
|
||||||
|
chr1 82249 rs1851945 A G . PASS ASP;GMAF=0.0452468007312614;KGPilot123;RSPOS=82249;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=92
|
||||||
|
chr1 82282 rs3871775 G A . PASS ASP;RSPOS=82282;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 82303 rs3871776 T C . PASS ASP;RSPOS=82303;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 82316 rs4030305 A C . PASS ASP;GNO;RSPOS=82316;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 82609 rs149189449 C G . PASS ASP;KGPilot123;RSPOS=82609;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
|
||||||
|
chr1 82676 rs185237834 T G . PASS KGPilot123;RSPOS=82676;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 82734 rs4030331 T C . PASS ASP;GMAF=0.261882998171846;KGPilot123;RSPOS=82734;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 82957 rs189774606 C T . PASS KGPilot123;RSPOS=82957;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 83084 rs181193408 T A . PASS KGPilot123;RSPOS=83084;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 83088 rs186081601 G C . PASS KGPilot123;RSPOS=83088;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 83107 rs4405097 G C . PASS ASP;RSPOS=83107;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=111
|
||||||
|
chr1 83119 rs4030324 AA A,ATAAC . PASS ASP;RSPOS=83120;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
|
||||||
|
chr1 83771 rs189906733 T G . PASS KGPilot123;RSPOS=83771;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 83786 rs58520670 T TA . PASS ASP;RSPOS=83794;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83815 rs58857344 GAGAA G . PASS ASP;RSPOS=83827;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83826 rs71281475 AAAGA A,AAA . PASS ASP;GNO;RSPOS=83827;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 83855 rs59596480 GAA G . PASS ASP;RSPOS=83857;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83872 rs59556914 AA A,AAGA . PASS ASP;RSPOS=83873;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83884 rs59586754 GAAA G . PASS ASP;RSPOS=83885;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83897 rs61330047 GAA G . PASS ASP;RSPOS=83899;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83901 rs58254183 GAAAGAA G . PASS ASP;RSPOS=83903;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83921 rs61338823 GAA G . PASS ASP;RSPOS=83923;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83930 rs71281474 AG A,AGA . PASS ASP;GNO;RSPOS=83931;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
|
||||||
|
chr1 83934 rs59235392 AG A,AGAAA . PASS ASP;RSPOS=83935;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 83977 rs180759811 A G . PASS KGPilot123;RSPOS=83977;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84002 rs28850140 G A . PASS ASP;GMAF=0.138939670932358;KGPilot123;RSPOS=84002;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=125
|
||||||
|
chr1 84010 rs186443818 G A . PASS KGPilot123;RSPOS=84010;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84018 rs61352176 GAA G . PASS ASP;RSPOS=84020;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
|
||||||
|
chr1 84079 rs190867312 T C . PASS KGPilot123;RSPOS=84079;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84139 rs183605470 A T . PASS KGPilot123;RSPOS=84139;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84156 rs188652299 A C . PASS KGPilot123;RSPOS=84156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84244 rs191297051 A C . PASS KGPilot123;RSPOS=84244;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84295 rs183209871 G A . PASS KGPilot123;RSPOS=84295;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84346 rs187855973 T C . PASS KGPilot123;RSPOS=84346;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84453 rs191379015 C G . PASS KGPilot123;RSPOS=84453;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
chr1 84705 rs183470350 T G . PASS KGPilot123;RSPOS=84705;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
100000 1 0
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
100000 1 11
|
||||||
|
0 chr1 (null)
|
||||||
|
0 100000 0
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue