649 lines
29 KiB
Java
649 lines
29 KiB
Java
package org.broadinstitute.sting.oneoffprojects.walkers;
|
|
|
|
import org.broad.tribble.util.variantcontext.Allele;
|
|
import org.broad.tribble.util.variantcontext.Genotype;
|
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
|
import org.broad.tribble.vcf.VCFHeader;
|
|
import org.broad.tribble.vcf.VCFHeaderLine;
|
|
import org.broadinstitute.sting.commandline.Argument;
|
|
import org.broadinstitute.sting.commandline.Output;
|
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
|
import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator;
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
import org.broadinstitute.sting.utils.SampleUtils;
|
|
import org.broadinstitute.sting.utils.StingException;
|
|
import org.broadinstitute.sting.utils.collections.Pair;
|
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
|
import org.broadinstitute.sting.utils.genotype.vcf.StandardVCFWriter;
|
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
|
|
import java.io.PrintStream;
|
|
import java.util.*;
|
|
|
|
|
|
/**
|
|
* Takes in a VCF file for a trio (and optionally, bam files for some or all members) and classifies mendelian violations
|
|
* as deNovo events, or opposite homozygotes, and includes additional information pertaining to event QC (size of
|
|
* homozygous region in child and/or parents, whether a site is likely tri-allelic or not, etc)
|
|
*
|
|
* @Author chartl
|
|
* @Date Jun 8, 2010
|
|
*/
|
|
public class MendelianViolationClassifier extends LocusWalker<MendelianViolationClassifier.MendelianViolation, VCFWriter> {
|
|
@Output
|
|
PrintStream out;
|
|
@Argument(shortName="f",fullName="familyPattern",required=true,doc="Pattern for the family structure (usage: mom+dad=child)")
|
|
String familyStr = null;
|
|
@Argument(shortName="ob",fullName="outputBed",required=true,doc="Output file to write the homozygous region information to")
|
|
PrintStream bedOutput = null;
|
|
@Argument(fullName="deNovoTriAllelicQ",required=false,doc="Cutoff for quality scores of 3rd allele at denovo mendelian violations to remove it from the denovo set; e.g. Q40 = need Q40 evidence for 3rd allele to toss out deNovo")
|
|
int deNovoTriQ = 20;
|
|
@Argument(fullName="deNovoParentalAllele",required=false,doc="Range for the parental allele at denovo sites to be kept in denovo set, e.g. 0.4-0.6 will toss out denovo sites with parental allele proportions of <0.4 and >0.6")
|
|
String deNovoParentalAllele = "-0.1-1.1";
|
|
@Argument(fullName="oppositeHomozygoteTriAllelicQ",required=false,doc="Cutoff for quality scores of 3rd allele at opposite homozygote sites to remove it from the violation set")
|
|
int opHomTriQ = 20;
|
|
@Argument(fullName="oppositeHomozygoteAlleleProportion",required=false,doc="Range for the parental allele in the parents at opposite homozygote sites for it to be kept in violation set")
|
|
String opHomAlleleProp = "-0.1-1.1";
|
|
|
|
|
|
/*
|
|
*********** PRIVATE CLASSES
|
|
*/
|
|
|
|
public class ExtendedTrioStructure extends MendelianViolationEvaluator.TrioStructure {
|
|
public HashMap<String,HomozygosityRegion> homozygousRegions;
|
|
public HashMap<String,Integer> homozygousRegionCounts;
|
|
public HashMap<String,MendelianInfoKey> regionKeys;
|
|
|
|
public ExtendedTrioStructure(String family) {
|
|
MendelianViolationEvaluator.TrioStructure struct = MendelianViolationEvaluator.parseTrioDescription(family);
|
|
this.child = struct.child;
|
|
this.mom = struct.mom;
|
|
this.dad = struct.dad;
|
|
homozygousRegions = new HashMap<String,HomozygosityRegion>(3);
|
|
homozygousRegionCounts = new HashMap<String,Integer>(3);
|
|
homozygousRegions.put(child,null);
|
|
homozygousRegions.put(mom,null);
|
|
homozygousRegions.put(dad,null);
|
|
homozygousRegionCounts.put(child,0);
|
|
homozygousRegionCounts.put(mom,0);
|
|
homozygousRegionCounts.put(dad,0);
|
|
regionKeys = new HashMap<String,MendelianInfoKey>(3);
|
|
regionKeys.put(child,MendelianInfoKey.ChildHomozygosityRegion);
|
|
regionKeys.put(mom,MendelianInfoKey.MotherHomozygosityRegion);
|
|
regionKeys.put(dad,MendelianInfoKey.FatherHomozygosityRegion);
|
|
}
|
|
|
|
public void updateHomozygosityRegions(MendelianViolation v, PrintStream output) {
|
|
if ( ! v.siteIsFiltered() ) {
|
|
ArrayList<HomozygosityRegion> brokenRegions = new ArrayList<HomozygosityRegion>(3);
|
|
// can only enter or break regions at unfiltered calls
|
|
for( Map.Entry<String, Genotype> memberGenotype : v.getUnderlyingGenotypes().entrySet() ) {
|
|
// for each family member
|
|
if ( homozygousRegions.get(memberGenotype.getKey()) == null ) {
|
|
// currently in a heterozygous region, update if possible
|
|
if ( memberGenotype.getValue().isHom() ) {
|
|
homozygousRegionCounts.put(memberGenotype.getKey(),homozygousRegionCounts.get(memberGenotype.getKey())+1);
|
|
homozygousRegions.put(memberGenotype.getKey(),new HomozygosityRegion(v.getLocus()));
|
|
if ( v.type != MendelianViolationType.NONE ) {
|
|
v.addAttribute(regionKeys.get(memberGenotype.getKey()).getKey(),homozygousRegionCounts.get(memberGenotype.getKey()));
|
|
}
|
|
}
|
|
} else {
|
|
// potentially breaking a homozygous region
|
|
if ( memberGenotype.getValue().isHom() ) {
|
|
// no break, update the region
|
|
HomozygosityRegion r = homozygousRegions.get(memberGenotype.getKey());
|
|
r.lastSeen = v.getLocus();
|
|
r.callsWithinRegion++;
|
|
if ( v.type != MendelianViolationType.NONE && ! v.violationIsFiltered() ) {
|
|
v.addAttribute(regionKeys.get(memberGenotype.getKey()).getKey(),homozygousRegionCounts.get(memberGenotype.getKey()));
|
|
if ( v.type == MendelianViolationType.DE_NOVO_SNP ) {
|
|
r.deNovoSNPsInRegion++;
|
|
} else if ( v.type == MendelianViolationType.OPPOSITE_HOMOZYGOTE ) {
|
|
r.oppositeHomsInRegion++;
|
|
}
|
|
}
|
|
} else if ( memberGenotype.getValue().isHet() ) {
|
|
// explicitly check for hets -- no calls are not counted -- this breaks a region so we print it
|
|
homozygousRegions.get(memberGenotype.getKey()).finalize(v.getLocus(),memberGenotype.getKey(),homozygousRegionCounts.get(memberGenotype.getKey()));
|
|
brokenRegions.add(homozygousRegions.get(memberGenotype.getKey()));
|
|
homozygousRegions.put(memberGenotype.getKey(),null);
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( brokenRegions.size() > 0 ) {
|
|
Collections.sort(brokenRegions);
|
|
}
|
|
|
|
for ( HomozygosityRegion r : brokenRegions ) {
|
|
output.printf("%s%n",r);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public class HomozygosityRegion implements Comparable{
|
|
public GenomeLoc regionStart;
|
|
public GenomeLoc lastSeen;
|
|
public GenomeLoc endedBy;
|
|
public int callsWithinRegion;
|
|
public int oppositeHomsInRegion;
|
|
public int deNovoSNPsInRegion;
|
|
private String parent;
|
|
private int id;
|
|
|
|
public HomozygosityRegion(GenomeLoc start) {
|
|
regionStart = start;
|
|
lastSeen = start;
|
|
endedBy = null;
|
|
callsWithinRegion = 0;
|
|
oppositeHomsInRegion = 0;
|
|
deNovoSNPsInRegion = 0;
|
|
}
|
|
|
|
public void finalize(GenomeLoc regionEnd,String parent, int id) {
|
|
endedBy = regionEnd;
|
|
this.parent = parent;
|
|
this.id = id;
|
|
}
|
|
|
|
private int getSizeLowerBound() {
|
|
return lastSeen.distance(regionStart);
|
|
}
|
|
|
|
private int getSizeOfFirstHomToFirstHet() {
|
|
return endedBy.distance(regionStart);
|
|
}
|
|
|
|
public String toString() {
|
|
return String.format("%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d",regionStart.getContig(),regionStart.getStart(),
|
|
lastSeen.getStart(),endedBy.getStart(),parent,id,getSizeLowerBound(),getSizeOfFirstHomToFirstHet(),
|
|
callsWithinRegion,oppositeHomsInRegion,deNovoSNPsInRegion);
|
|
}
|
|
|
|
public int compareTo(Object o) {
|
|
if ( ! ( o instanceof HomozygosityRegion) ) {
|
|
return Integer.MIN_VALUE;
|
|
}
|
|
|
|
return this.regionStart.compareTo(((HomozygosityRegion) o).regionStart);
|
|
}
|
|
|
|
public String getContigStr() {
|
|
return regionStart.getContig();
|
|
}
|
|
}
|
|
|
|
public class MendelianViolation {
|
|
private VariantContext trio;
|
|
public MendelianViolationType type;
|
|
private HashMap<String,Object> newAttributes;
|
|
private HashMap<String,Integer> homozygosityRegions;
|
|
private boolean filtered = false;
|
|
|
|
public MendelianViolation(VariantContext context, MendelianViolationType violationType) {
|
|
trio = context;
|
|
type = violationType;
|
|
newAttributes = new HashMap<String,Object>();
|
|
newAttributes.put(MendelianInfoKey.ViolationType.getKey(),type);
|
|
homozygosityRegions = new HashMap<String,Integer>(3);
|
|
}
|
|
|
|
public void addAttribute(String key, Object val) {
|
|
newAttributes.put(key,val);
|
|
}
|
|
|
|
public VariantContext toVariantContext() {
|
|
newAttributes.putAll(trio.getAttributes());
|
|
return new VariantContext(trio.getName(), trio.getChr(), trio.getStart(), trio.getEnd(),trio.getAlleles(),trio.getGenotypes(),trio.getNegLog10PError(),trio.filtersWereApplied()?trio.getFilters():null,newAttributes);
|
|
}
|
|
|
|
public boolean siteIsFiltered() {
|
|
return trio.isFiltered();
|
|
}
|
|
|
|
public void addRegions(HashMap<String,Integer> regionIDsByName ) {
|
|
for ( Map.Entry<String,Integer> e : regionIDsByName.entrySet() ) {
|
|
setRegion(e.getKey(),e.getValue());
|
|
}
|
|
}
|
|
|
|
public void setRegion(String parent, int regionID) {
|
|
homozygosityRegions.put(parent,regionID);
|
|
}
|
|
|
|
public boolean isInPreviousRegions(Map<String,Integer> otherRegions) {
|
|
for ( String s : otherRegions.keySet() ) {
|
|
if ( homozygosityRegions.get(s) >= otherRegions.get(s) ) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
public Map<String,Genotype> getUnderlyingGenotypes() {
|
|
return trio.getGenotypes();
|
|
}
|
|
|
|
public GenomeLoc getLocus() {
|
|
return VariantContextUtils.getLocation(trio);
|
|
}
|
|
|
|
public byte getRefBase() {
|
|
return trio.getReference().getBases()[0];
|
|
}
|
|
|
|
public Object getAttribute(String key) {
|
|
if ( newAttributes.keySet().contains(key) ) {
|
|
return newAttributes.get(key);
|
|
} else {
|
|
return trio.getAttribute(key);
|
|
}
|
|
}
|
|
|
|
public void filter() {
|
|
filtered = true;
|
|
newAttributes.put(MendelianInfoKey.ViolationType.getKey(),"Filtered_"+newAttributes.get(MendelianInfoKey.ViolationType.getKey()));
|
|
}
|
|
|
|
public String getType() {
|
|
return filtered ? "Filtered_"+type.toString() : type.toString();
|
|
}
|
|
|
|
public boolean violationIsFiltered() {
|
|
return filtered;
|
|
}
|
|
}
|
|
|
|
public class Range {
|
|
private double upper;
|
|
private double lower;
|
|
private double epsilon = 10e-3;
|
|
|
|
Range(String rangeStr) {
|
|
String rs = rangeStr.substring(0); // don't clobber original string
|
|
boolean startIsNegative = rangeStr.startsWith("-");
|
|
if ( startIsNegative ) {
|
|
rs = rs.substring(1);
|
|
}
|
|
String[] lu = rs.split("-");
|
|
lower = startIsNegative ? -1*Double.parseDouble(lu[0]) : Double.parseDouble(lu[0]);
|
|
upper = Double.parseDouble(lu[1]);
|
|
//System.out.printf("Lower: %.2f, Upper: %.2f",lower,upper);
|
|
}
|
|
|
|
public boolean contains(double p) {
|
|
return p > lower-epsilon && p < upper+epsilon;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*************** PRIVATE ENUMS
|
|
*/
|
|
|
|
public enum MendelianViolationType {
|
|
OPPOSITE_HOMOZYGOTE("oppositeHomozygote"),
|
|
DE_NOVO_SNP("deNovoSNP"),
|
|
NONE("none");
|
|
|
|
private String infoString;
|
|
|
|
MendelianViolationType(String typeName) {
|
|
infoString=typeName;
|
|
}
|
|
|
|
public String toString() {
|
|
return infoString;
|
|
}
|
|
}
|
|
|
|
public enum MendelianInfoKey {
|
|
ViolationType("MVT","String",1,"\"The Mendelian violation type\""),
|
|
ParentalDeletion("deletedParent","String",1,"\"The parent from whom the child (putatively) inherited a deletion at opposite homozygous sites\""),
|
|
TriAllelicQuality("TriAlQ","Integer",1,"\"The variant quality of the third allele at putative tri-allelic sites\""),
|
|
TriAllelicBase("TriAlB","String",1,"\"The third allele at putative tri-allelic sites\""),
|
|
MotherHomozygosityRegion("MHR","String",1,"\"An identifier for the mother's homozygosity region where the violation is located\""),
|
|
FatherHomozygosityRegion("FHR","String",1,"\"An identifier for the father's homozygosity region where the violation is located\""),
|
|
ChildHomozygosityRegion("CHR","Integer",1,"\"An identifier for the child's homozygosity region where the violation is located\""),
|
|
ProportionOfParentAllele("PropParent","Float",1,"\"The proportion of bases in the child that were the parent allele at deNovo SNP sites\""),
|
|
/* ***************************************** UNUSED ************************************************ */
|
|
NumCallsInRegion("REGION_NCALLS","Integer",1,"\"The number of unfiltered SNP calls found in the homozygosity region\""),
|
|
ChildHomozygosityRegionSize("CHRS","Integer",1,"\"The size of the region of homozygosity in the child in which the opposite homozygote is located\""),
|
|
OppositeHomozygotesInRegion("CHROH","Integer",1,"\"The number of opposite-homozygotes located in the region of homozygosity\""),
|
|
ParentHomozygosityRegionSize("PHRS","Integer",1,"\"The size of the parental homozygosity region where the deNovo SNP is located\""),
|
|
DeNovoSNPsInRegion("PHRDN","Integer",1,"\"The number of deNovo SNP events located in the same region of parental homozygosity where the deNovo SNP is located\"");
|
|
|
|
|
|
String keyName;
|
|
String valueType;
|
|
String valueDescription;
|
|
int numFields;
|
|
|
|
MendelianInfoKey(String keyStr,String infoType, int fields, String description) {
|
|
keyName = keyStr;
|
|
valueType = infoType;
|
|
valueDescription = description;
|
|
numFields = fields;
|
|
}
|
|
|
|
public String toString() {
|
|
return String.format("%s,%s,%d,%s",keyName,valueType,numFields,valueDescription);
|
|
}
|
|
|
|
public String getKey() {
|
|
return keyName;
|
|
}
|
|
}
|
|
|
|
/*
|
|
**************** PRIVATE DATA
|
|
*/
|
|
private ExtendedTrioStructure trioStructure;
|
|
private UnifiedGenotyperEngine engine;
|
|
private Range deNovoRange;
|
|
private Range opHomRange;
|
|
|
|
/*
|
|
***************** INITIALIZE
|
|
*/
|
|
public void initialize() {
|
|
trioStructure = new ExtendedTrioStructure(familyStr);
|
|
deNovoRange = new Range(deNovoParentalAllele);
|
|
opHomRange = new Range(opHomAlleleProp);
|
|
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
|
uac.MIN_BASE_QUALTY_SCORE = 10;
|
|
uac.MIN_MAPPING_QUALTY_SCORE = 10;
|
|
uac.STANDARD_CONFIDENCE_FOR_CALLING = Math.min(deNovoTriQ,opHomTriQ);
|
|
engine = new UnifiedGenotyperEngine(getToolkit(),uac);
|
|
logger.info("Mom: "+trioStructure.mom+" Dad: "+trioStructure.dad+" Child: "+trioStructure.child);
|
|
bedOutput.printf("%s%n",getBedFileHeader());
|
|
}
|
|
|
|
/*
|
|
*********** REDUCE INIT
|
|
*/
|
|
public VCFWriter reduceInit() {
|
|
VCFWriter writer = new StandardVCFWriter(out);
|
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
|
hInfo.add(new VCFHeaderLine("source", "MendelianViolationClassifier"));
|
|
for ( MendelianInfoKey key : EnumSet.allOf(MendelianInfoKey.class) ) {
|
|
hInfo.add( new VCFHeaderLine("INFO",key.toString()));
|
|
}
|
|
VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
|
writer.writeHeader(vcfHeader);
|
|
|
|
return writer;
|
|
}
|
|
|
|
/*
|
|
***************** FILTER
|
|
*/
|
|
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
return tracker != null;
|
|
}
|
|
|
|
/*
|
|
*************** MAP
|
|
*/
|
|
public MendelianViolation map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
return assessViolation(tracker.getVariantContext(ref,"trio", EnumSet.of(VariantContext.Type.SNP),ref.getLocus(),true),tracker,ref,context);
|
|
}
|
|
|
|
private boolean isComplete(VariantContext vc) {
|
|
for ( Genotype g : vc.getGenotypes().values() ) {
|
|
if ( g.isNoCall() || g.isFiltered() ) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private MendelianViolation assessViolation(VariantContext varContext, RefMetaDataTracker tracker, ReferenceContext reference, AlignmentContext context) {
|
|
MendelianViolation violation;
|
|
if ( varContext != null ) {
|
|
if ( isComplete(varContext) && MendelianViolationEvaluator.isViolation(varContext,trioStructure) ) {
|
|
if ( isDeNovo(varContext) ) {
|
|
violation = assessDeNovo(varContext,tracker,reference,context);
|
|
} else if ( isOppositeHomozygote(varContext) ) {
|
|
violation = assessOppositeHomozygote(varContext,tracker,reference,context);
|
|
} else {
|
|
throw new StingException("Mendelian violation that is neither deNovo nor opposite homozygote. Should never see this.");
|
|
}
|
|
} else {
|
|
violation = new MendelianViolation(varContext,MendelianViolationType.NONE);
|
|
}
|
|
|
|
} else {
|
|
violation = null;
|
|
}
|
|
|
|
return violation;
|
|
}
|
|
|
|
// Note, mendelian violation is guaranteed at this point; deNovo only happens at child het sites
|
|
private boolean isDeNovo(VariantContext trio) {
|
|
return trio.getGenotype(trioStructure.child).isHet();
|
|
}
|
|
|
|
// Note, mendelian violation is guaranteed at this point; technically we do not have to check this...but we are
|
|
private boolean isOppositeHomozygote(VariantContext trio) {
|
|
if ( trio.getGenotype(trioStructure.child).isHet() ) { // not valid at child het sites
|
|
return false;
|
|
} else if ( trio.getHetCount() > 1 ) { // child is not het, so if this is 2, mom and dad are both het, invalid
|
|
return false;
|
|
} else if ( trio.getGenotype(trioStructure.dad) == null || trio.getGenotype(trioStructure.mom) == null ) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
************ ASSESS DE NOVO AND OPPOSITE HOMOZYGOTES
|
|
*/
|
|
|
|
private MendelianViolation assessDeNovo(VariantContext trio, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
MendelianViolation violation = new MendelianViolation(trio,MendelianViolationType.DE_NOVO_SNP);
|
|
|
|
// look for mis-genotyped sites by examining the proportion of the parent-allele bases in the child --
|
|
// as a spoeedup we do this only at non-filtered sites
|
|
|
|
if ( ! trio.isFiltered() ) {
|
|
Allele parental = trio.getGenotype(trioStructure.mom).getAllele(0); // guaranteed homozygous
|
|
if ( parental.getBases().length < 1 ) {
|
|
throw new StingException("Parental bases have length zero at "+trio.toString());
|
|
}
|
|
|
|
Map<String,StratifiedAlignmentContext> splitContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
|
|
Double proportion = getAlleleProportion(parental,splitContext.get(trioStructure.child));
|
|
if ( proportion != null ) {
|
|
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(), proportion);
|
|
if ( ! deNovoRange.contains(proportion) ) {
|
|
//System.out.println("Filtering deNovo by proportion: is "+proportion+" should be in range "+deNovoRange.lower+"-"+deNovoRange.upper);
|
|
violation.filter();
|
|
}
|
|
}
|
|
|
|
Pair<Allele,Integer> triAl = getTriAllelicQuality(tracker,ref,trio,splitContext);
|
|
if ( triAl != null ) {
|
|
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
|
|
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
|
|
if ( triAl.second >= deNovoTriQ ) {
|
|
violation.filter();
|
|
}
|
|
}
|
|
|
|
} else {
|
|
violation.filter();
|
|
}
|
|
|
|
return violation;
|
|
}
|
|
|
|
private MendelianViolation assessOppositeHomozygote(VariantContext trio, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
MendelianViolation violation = new MendelianViolation(trio,MendelianViolationType.OPPOSITE_HOMOZYGOTE);
|
|
logger.debug(getParentalDeletion(trio));
|
|
violation.addAttribute(MendelianInfoKey.ParentalDeletion.getKey(),getParentalDeletion(trio));
|
|
|
|
// look for tri-allelic sites mis-called as hom -- as a speedup we do this only at non-filtered, non genotype error sites
|
|
|
|
if ( ! trio.isFiltered() ) {
|
|
Map<String,StratifiedAlignmentContext> splitCon = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
|
|
Pair<Allele,Integer> triAl = getTriAllelicQuality(tracker, ref, trio, splitCon);
|
|
if ( triAl != null ) {
|
|
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
|
|
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
|
|
if ( triAl.second >= opHomTriQ ) {
|
|
violation.filter();
|
|
}
|
|
}
|
|
|
|
Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.child).getAllele(0),splitCon.get(trioStructure.child));
|
|
Double motherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.mom));
|
|
Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.dad).getAllele(0),splitCon.get(trioStructure.dad));
|
|
if ( childProp != null ) {
|
|
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(),childProp);
|
|
if ( ! opHomRange.contains(childProp) ) {
|
|
violation.filter();
|
|
}
|
|
}
|
|
|
|
if ( motherProp != null && ! opHomRange.contains(motherProp) ) {
|
|
violation.filter();
|
|
}
|
|
|
|
if ( fatherProp != null && ! opHomRange.contains(fatherProp) ) {
|
|
violation.filter();
|
|
}
|
|
} else {
|
|
violation.filter();
|
|
}
|
|
|
|
return violation;
|
|
}
|
|
|
|
private Double getAlleleProportion(Allele a, StratifiedAlignmentContext context) {
|
|
int numParental = 0;
|
|
int total = 0;
|
|
if ( context != null ) {
|
|
for ( PileupElement e : context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()) {
|
|
if ( e.getQual() >= 10 && e.getMappingQual() >= 10 ) {
|
|
total++;
|
|
if ( e.getBase() == a.getBases()[0]) {
|
|
numParental ++;
|
|
}
|
|
}
|
|
}
|
|
return ( (double) numParental )/total;
|
|
} else {
|
|
return null;
|
|
}
|
|
|
|
|
|
}
|
|
|
|
private Pair<Allele,Integer> getTriAllelicQuality(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext var, Map<String,StratifiedAlignmentContext> strat) {
|
|
int conf = 0;
|
|
Allele alt = null;
|
|
for ( Map.Entry<String,StratifiedAlignmentContext> sEntry : strat.entrySet() ) {
|
|
VariantCallContext call = engine.runGenotyper(tracker, ref, sEntry.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE));
|
|
if ( call != null && call.confidentlyCalled && call.vc != null ) {
|
|
if ( call.vc.isSNP() ) {
|
|
if ( ! call.vc.getAlternateAllele(0).basesMatch(var.getAlternateAllele(0))) {
|
|
if ( alt == null ) {
|
|
alt = call.vc.getAlternateAllele(0);
|
|
conf = (int) Math.floor(10*call.vc.getNegLog10PError());
|
|
} else {
|
|
conf += (int) Math.floor(10*call.vc.getNegLog10PError());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( alt == null ) {
|
|
return null;
|
|
} else {
|
|
return new Pair<Allele,Integer>(alt,conf);
|
|
}
|
|
}
|
|
|
|
private String getParentalDeletion(VariantContext trio) {
|
|
// case 1, mom and dad are both hom, so missing is the one whose alleles don't match the child
|
|
if ( trio.getGenotype(trioStructure.mom).isHom() && trio.getGenotype(trioStructure.dad).isHom() ) {
|
|
if ( trio.getGenotype(trioStructure.mom).isHomRef() == trio.getGenotype(trioStructure.child).isHomRef() ) {
|
|
// mom and child both hom ref or hom var
|
|
return trioStructure.dad;
|
|
} else if ( trio.getGenotype(trioStructure.dad).isHomRef() == trio.getGenotype(trioStructure.child).isHomRef() ) {
|
|
return trioStructure.mom;
|
|
} else {
|
|
// child matches neither parent
|
|
return "genotypeError";
|
|
}
|
|
} else { // case 2, either mom or dad is het - the hom must be the missing allele
|
|
return trio.getGenotype(trioStructure.mom).isHet() ? trioStructure.dad : trioStructure.mom;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*************** REDUCE
|
|
*/
|
|
public VCFWriter reduce(MendelianViolation variant, VCFWriter writer) {
|
|
if ( variant != null ) {
|
|
trioStructure.updateHomozygosityRegions(variant,bedOutput);
|
|
writer.add(variant.toVariantContext(),variant.getRefBase());
|
|
}
|
|
|
|
return writer;
|
|
}
|
|
|
|
/*
|
|
********** ON TRAVERSAL DONE
|
|
*/
|
|
public void onTraversalDone(VCFWriter writer) {
|
|
Map<String,HomozygosityRegion> regions = trioStructure.homozygousRegions;
|
|
Map<String,Integer> counts = trioStructure.homozygousRegionCounts;
|
|
List<HomozygosityRegion> to_print = new ArrayList<HomozygosityRegion>(3);
|
|
for ( Map.Entry<String,HomozygosityRegion> entryRegion : regions.entrySet() ) {
|
|
if ( entryRegion.getValue() != null ) {
|
|
logger.info("---------------- REGION NOT FINALIZED -----------------");
|
|
logger.info(String.format("%s,%s,%s,%d,%d",entryRegion.getKey(),entryRegion.getValue().regionStart,entryRegion.getValue().lastSeen,
|
|
entryRegion.getValue().deNovoSNPsInRegion,entryRegion.getValue().oppositeHomsInRegion));
|
|
int chr_end = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(entryRegion.getValue().getContigStr()).getSequenceLength();
|
|
entryRegion.getValue().endedBy = GenomeLocParser.createGenomeLoc(entryRegion.getValue().getContigStr(),chr_end,chr_end);
|
|
to_print.add(entryRegion.getValue());
|
|
}
|
|
}
|
|
|
|
Collections.sort(to_print);
|
|
for ( HomozygosityRegion hr : to_print ) {
|
|
bedOutput.printf("%s%n",hr);
|
|
}
|
|
}
|
|
|
|
/*
|
|
***************** STATIC METHODS
|
|
*/
|
|
|
|
public static String getBedFileHeader() {
|
|
return String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s","Chrom","first_seen_hom","last_seen_hom","first_seen_het",
|
|
"sample_name","region_id","homozygous_region_size", "size_to_first_het","calls_within_region",
|
|
"opposite_homozygotes_in_region","deNovo_SNPs_in_region");
|
|
}
|
|
}
|