diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java new file mode 100644 index 000000000..f07017724 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -0,0 +1,269 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; +import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; + +import java.util.*; + + +/** + * Takes in a VCF file for a trio (and optionally, bam files for some or all members) and classifies mendelian violations + * as deNovo events, or opposite homozygotes, and includes additional information pertaining to event QC (size of + * homozygous region in child and/or parents, whether a site is likely tri-allelic or not, etc) + * + * @Author chartl + * @Date Jun 8, 2010 + */ +public class MendelianViolationClassifier extends LocusWalker { + @Argument(shortName="f",fullName="familyPattern",required=true,doc="Pattern for the family structure (usage: mom+dad=child)") + String familyStr = null; + + /* + ****** PRIVATE CLASSES + */ + + public class ExtendedTrioStructure extends MendelianViolationEvaluator.TrioStructure { + public HashMap homozygousRegions; + public HashMap homozygousRegionCounts; + + public ExtendedTrioStructure(String family) { + MendelianViolationEvaluator.TrioStructure struct = MendelianViolationEvaluator.parseTrioDescription(family); + this.child = struct.child; + this.mom = struct.mom; + this.dad = struct.dad; + homozygousRegions = new HashMap(3); + homozygousRegionCounts = new HashMap(3); + homozygousRegions.put(child,null); + homozygousRegions.put(mom,null); + homozygousRegions.put(dad,null); + homozygousRegionCounts.put(child,0); + homozygousRegionCounts.put(mom,0); + homozygousRegionCounts.put(dad,0); + } + } + + public class HomozygosityRegion { + public GenomeLoc regionStart; + public int callsWithinRegion; + + public HomozygosityRegion(GenomeLoc start) { + regionStart = start; + callsWithinRegion = 0; + } + } + + public class MendelianViolation { + private VariantContext trio; + public MendelianViolationType type; + private HashMap newAttributes; + + public MendelianViolation(VariantContext context, MendelianViolationType violationType) { + trio = context; + type = violationType; + newAttributes = new HashMap(); + newAttributes.put(MendelianInfoKey.ViolationType.getKey(),type); + } + + public void addAttribute(String key, Object val) { + newAttributes.put(key,val); + } + + public VariantContext toVariantContext() { + newAttributes.putAll(trio.getAttributes()); + return new VariantContext(trio.getName(),trio.getLocation(),trio.getAlleles(),trio.getGenotypes(),trio.getNegLog10PError(),trio.getFilters(),newAttributes); + } + + public boolean siteIsFiltered() { + return trio.isFiltered(); + } + + } + + /* + ********** PRIVATE ENUMS + */ + + public enum MendelianViolationType { + OPPOSITE_HOMOZYGOTE("oppositeHomozygote"), + DE_NOVO_SNP("deNovoSNP"), + NONE("none"); + + private String infoString; + + MendelianViolationType(String typeName) { + infoString=typeName; + } + + public String toString() { + return infoString; + } + } + + public enum MendelianInfoKey { + ViolationType("MVT","String","\"The Mendelian violation type\""), + ParentalDeletion("deletedParent","String","\"The parent from whom the child (putatively) inherited a deletion at opposite homozygous sites\""), + ChildHomozygosityRegion("CHR","Integer","\"An identifier for the region of homozygosity in the child in which the opposite homozygote is located\""), + ChildHomozygosityRegionSize("CHRS","Integer","\"The size of the region of homozygosity in the child in which the opposite homozygote is located\""), + OppositeHomozygotesInRegion("CHROH","Integer","\"The number of opposite-homozygotes located in the region of homozygosity\""), + TriAllelicQuality("TriAlQ","Integer","\"The variant quality of the third allele at putative tri-allelic sites\""), + TriAllelicBase("TriAlB","String","\"The third allele at putative tri-allelic sites\""), + NumCallsInRegion("REGION_NCALLS","Integer","\"The number of SNP calls found in the homozygosity region\""), + ParentHomozygosityRegion("PHR","String","\"An identifier for the parent, and the region number, for the homozygosity region where the deNovo SNP is located\""), + ParentHomozygosityRegionSize("PHRS","Integer","\"The size of the parental homozygosity region where the deNovo SNP is located\""), + DeNovoSNPsInRegion("PHRDN","Integer","\"The number of deNovo SNP events located in the same region of parental homozygosity where the deNovo SNP is located\""), + ProportionOfParentAllele("PropParent","Double","\"The proportion of bases in the child that were the parent allele at deNovo SNP sites\""); + + + String keyName; + String valueType; + String valueDescription; + + MendelianInfoKey(String keyStr,String infoType,String description) { + keyName = keyStr; + valueType = infoType; + valueDescription = description; + } + + public String toString() { + return String.format("%s,%s,%s",keyName,valueType,valueDescription); + } + + public String getKey() { + return keyName; + } + } + + /* + *********** PRIVATE DATA + */ + private ExtendedTrioStructure trioStructure; + private UnifiedGenotyperEngine engine; + private ArrayList contextBuffer; + + /* + *********** INITIALIZE + */ + public void initialize() { + trioStructure = new ExtendedTrioStructure(familyStr); + contextBuffer = new ArrayList(5000); + UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); + uac.MIN_BASE_QUALTY_SCORE = 10; + uac.MIN_MAPPING_QUALTY_SCORE = 10; + uac.STANDARD_CONFIDENCE_FOR_CALLING = 30; + engine = new UnifiedGenotyperEngine(getToolkit(),uac); + } + + /* + *********** REDUCE INIT + */ + public VCFWriter reduceInit() { + VCFWriter writer = new VCFWriter(out); + Set hInfo = new HashSet(); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier")); + for ( MendelianInfoKey key : EnumSet.allOf(MendelianInfoKey.class) ) { + hInfo.add( new VCFHeaderLine("INFO",key.toString())); + } + VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit())); + writer.writeHeader(vcfHeader); + + return writer; + } + + /* + *********** MAP + */ + public MendelianViolation map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) { + return null; + } + + return assessViolation(tracker.getVariantContext(ref,"trio", EnumSet.of(VariantContext.Type.SNP),ref.getLocus(),true),tracker,ref,context); + } + + private MendelianViolation assessViolation(VariantContext varContext, RefMetaDataTracker tracker, ReferenceContext reference, AlignmentContext context) { + MendelianViolation violation; + if ( MendelianViolationEvaluator.isViolation(varContext,trioStructure) ) { + if ( isDeNovo(varContext) ) { + violation = assessDeNovo(varContext,tracker,reference,context); + } else if ( isOppositeHomozygote(varContext) ) { + violation = assessOppositeHomozygote(varContext,tracker,reference,context); + } else { + throw new StingException("Mendelian violation that is neither deNovo nor opposite homozygote. Should never see this."); + } + } else { + violation = new MendelianViolation(varContext,MendelianViolationType.NONE); + } + + return violation; + } + + // Note, mendelian violation is guaranteed at this point; deNovo only happens at child het sites + private boolean isDeNovo(VariantContext trio) { + return trio.getGenotype(trioStructure.child).isHet(); + } + + // Note, mendelian violation is guaranteed at this point; technically we do not have to check this...but we are + private boolean isOppositeHomozygote(VariantContext trio) { + if ( trio.getGenotype(trioStructure.child).isHet() ) { // not valid at child het sites + return false; + } else if ( trio.getHetCount() > 1 ) { // child is not het, so if this is 2, mom and dad are both het, invalid + return false; + } else if ( trio.getGenotype(trioStructure.dad) == null || trio.getGenotype(trioStructure.mom) == null ) { + return false; + } + + return true; + } + + private MendelianViolation assessDeNovo(VariantContext trio, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + // todo -- implement me + return new MendelianViolation(trio,MendelianViolationType.DE_NOVO_SNP); + } + + private MendelianViolation assessOppositeHomozygote(VariantContext trio, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + // todo -- implement me + return new MendelianViolation(trio,MendelianViolationType.OPPOSITE_HOMOZYGOTE); + } + + /* + *********** REDUCE + */ + public VCFWriter reduce(MendelianViolation variant, VCFWriter writer) { + if ( variant != null ) { + if ( ! variant.siteIsFiltered() ) { + // todo -- fill me in + } else { /* just add filtered variants to output if they're wanted */ + contextBuffer.add(variant.toVariantContext()); + } + } + + return writer; + } + + /* + ********** ON TRAVERSAL DONE + */ + public void onTraversalDone(VCFWriter writer) { + while ( contextBuffer.size() > 0 ) { + VariantContext vc = contextBuffer.remove(0); + writer.addRecord(VariantContextAdaptors.toVCF(vc,vc.getReference().getBases()[0])); + } + } +}