Committing changes to comp overlap for indels. Passes all integration tests; minor changes to MVC walker.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3618 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9b8775180e
commit
75d4736600
|
|
@ -41,8 +41,11 @@ public class CompOverlap extends VariantEvaluator {
|
|||
@DataPoint(name = "% concordant", description = "the concordance rate")
|
||||
double concordantRate = 0.0;
|
||||
|
||||
private boolean expectingIndels = false;
|
||||
|
||||
public CompOverlap(VariantEvalWalker parent) {
|
||||
super(parent);
|
||||
expectingIndels = parent.dels;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
@ -84,10 +87,10 @@ public class CompOverlap extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean compIsGood = comp != null && comp.isSNP() && comp.isNotFiltered();
|
||||
boolean evalIsGood = eval != null && eval.isSNP();
|
||||
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
|
||||
|
||||
if (compIsGood) nCompSNPs++; // count the number of comp events
|
||||
if ( compIsGood ) nCompSNPs++; // count the number of comp events
|
||||
if (evalIsGood) nEvalSNPs++; // count the number of eval events
|
||||
|
||||
if (compIsGood && evalIsGood) {
|
||||
|
|
@ -99,4 +102,6 @@ public class CompOverlap extends VariantEvaluator {
|
|||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,10 +48,8 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
String deNovoParentalAllele = "-0.1-1.1";
|
||||
@Argument(fullName="oppositeHomozygoteTriAllelicQ",required=false,doc="Cutoff for quality scores of 3rd allele at opposite homozygote sites to remove it from the violation set")
|
||||
int opHomTriQ = 20;
|
||||
@Argument(fullName="oppositeHomozygoteParentAllele",required=false,doc="Range for the parental allele in the parents at opposite homozygote sites for it to be kept in violation set")
|
||||
String opHomParentAllele = "-0.1-1.1";
|
||||
@Argument(fullName="oppositeHomozygoteChildAllele",required=false,doc="Range for the parental allele in the child at opposite homozygote sites for it to be kept in violation set")
|
||||
String opHomChildAllele = "-0.1-1.1";
|
||||
@Argument(fullName="oppositeHomozygoteAlleleProportion",required=false,doc="Range for the parental allele in the parents at opposite homozygote sites for it to be kept in violation set")
|
||||
String opHomAlleleProp = "-0.1-1.1";
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -104,7 +102,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
HomozygosityRegion r = homozygousRegions.get(memberGenotype.getKey());
|
||||
r.lastSeen = v.getLocus();
|
||||
r.callsWithinRegion++;
|
||||
if ( v.type != MendelianViolationType.NONE && ! v.type.isFiltered() ) {
|
||||
if ( v.type != MendelianViolationType.NONE && ! v.violationIsFiltered() ) {
|
||||
v.addAttribute(regionKeys.get(memberGenotype.getKey()).getKey(),homozygousRegionCounts.get(memberGenotype.getKey()));
|
||||
if ( v.type == MendelianViolationType.DE_NOVO_SNP ) {
|
||||
r.deNovoSNPsInRegion++;
|
||||
|
|
@ -189,6 +187,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
public MendelianViolationType type;
|
||||
private HashMap<String,Object> newAttributes;
|
||||
private HashMap<String,Integer> homozygosityRegions;
|
||||
private boolean filtered = false;
|
||||
|
||||
public MendelianViolation(VariantContext context, MendelianViolationType violationType) {
|
||||
trio = context;
|
||||
|
|
@ -250,6 +249,19 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
return trio.getAttribute(key);
|
||||
}
|
||||
}
|
||||
|
||||
public void filter() {
|
||||
filtered = true;
|
||||
newAttributes.put(MendelianInfoKey.ViolationType.getKey(),"Filtered_"+newAttributes.get(MendelianInfoKey.ViolationType.getKey()));
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return filtered ? "Filtered_"+type.toString() : type.toString();
|
||||
}
|
||||
|
||||
public boolean violationIsFiltered() {
|
||||
return filtered;
|
||||
}
|
||||
}
|
||||
|
||||
public class Range {
|
||||
|
|
@ -284,22 +296,13 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
NONE("none");
|
||||
|
||||
private String infoString;
|
||||
private Boolean isFiltered = false;
|
||||
|
||||
MendelianViolationType(String typeName) {
|
||||
infoString=typeName;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return isFiltered ? "Filtered_"+infoString : infoString;
|
||||
}
|
||||
|
||||
public void filter() {
|
||||
isFiltered = true;
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
return isFiltered;
|
||||
return infoString;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -345,8 +348,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
private ExtendedTrioStructure trioStructure;
|
||||
private UnifiedGenotyperEngine engine;
|
||||
private Range deNovoRange;
|
||||
private Range opHomParentRange;
|
||||
private Range opHomChildRange;
|
||||
private Range opHomRange;
|
||||
|
||||
/*
|
||||
***************** INITIALIZE
|
||||
|
|
@ -354,8 +356,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
public void initialize() {
|
||||
trioStructure = new ExtendedTrioStructure(familyStr);
|
||||
deNovoRange = new Range(deNovoParentalAllele);
|
||||
opHomParentRange = new Range(opHomParentAllele);
|
||||
opHomChildRange = new Range(opHomChildAllele);
|
||||
opHomRange = new Range(opHomAlleleProp);
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
uac.MIN_BASE_QUALTY_SCORE = 10;
|
||||
uac.MIN_MAPPING_QUALTY_SCORE = 10;
|
||||
|
|
@ -372,7 +373,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
VCFWriter writer = new VCFWriter(out);
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier"));
|
||||
hInfo.add(new VCFHeaderLine("source", "MendelianViolationClassifier"));
|
||||
for ( MendelianInfoKey key : EnumSet.allOf(MendelianInfoKey.class) ) {
|
||||
hInfo.add( new VCFHeaderLine("INFO",key.toString()));
|
||||
}
|
||||
|
|
@ -467,7 +468,8 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
if ( proportion != null ) {
|
||||
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(), proportion);
|
||||
if ( ! deNovoRange.contains(proportion) ) {
|
||||
violation.type.filter();
|
||||
//System.out.println("Filtering deNovo by proportion: is "+proportion+" should be in range "+deNovoRange.lower+"-"+deNovoRange.upper);
|
||||
violation.filter();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -476,12 +478,12 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
|
||||
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
|
||||
if ( triAl.second >= deNovoTriQ ) {
|
||||
violation.type.filter();
|
||||
violation.filter();
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
violation.type.filter();
|
||||
violation.filter();
|
||||
}
|
||||
|
||||
return violation;
|
||||
|
|
@ -501,29 +503,29 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
|
||||
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
|
||||
if ( triAl.second >= opHomTriQ ) {
|
||||
violation.type.filter();
|
||||
violation.filter();
|
||||
}
|
||||
}
|
||||
|
||||
Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.child));
|
||||
Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.child).getAllele(0),splitCon.get(trioStructure.child));
|
||||
Double motherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.mom));
|
||||
Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.dad));
|
||||
Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.dad).getAllele(0),splitCon.get(trioStructure.dad));
|
||||
if ( childProp != null ) {
|
||||
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(),childProp);
|
||||
if ( ! opHomChildRange.contains(childProp) ) {
|
||||
violation.type.filter();
|
||||
if ( ! opHomRange.contains(childProp) ) {
|
||||
violation.filter();
|
||||
}
|
||||
}
|
||||
|
||||
if ( motherProp != null && ! opHomParentRange.contains(motherProp) ) {
|
||||
violation.type.filter();
|
||||
if ( motherProp != null && ! opHomRange.contains(motherProp) ) {
|
||||
violation.filter();
|
||||
}
|
||||
|
||||
if ( fatherProp != null && ! opHomParentRange.contains(fatherProp) ) {
|
||||
violation.type.filter();
|
||||
if ( fatherProp != null && ! opHomRange.contains(fatherProp) ) {
|
||||
violation.filter();
|
||||
}
|
||||
} else {
|
||||
violation.type.filter();
|
||||
violation.filter();
|
||||
}
|
||||
|
||||
return violation;
|
||||
|
|
|
|||
|
|
@ -1,198 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||
*
|
||||
* @Author chartl
|
||||
* @Date Jun 7, 2010
|
||||
*/
|
||||
public class OppositeHomozygoteClassifier extends RodWalker<VariantContext,VCFWriter> {
|
||||
@Argument(shortName="f",fullName="familyPattern",required=true,doc="Pattern for the family structure (usage: mom+dad=child)")
|
||||
String familyStr = null;
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
private TrioStructure trioStructure; /* holds sample names of mom dad child */
|
||||
private ArrayList<VariantContext> contextBuffer; /* holds contexts until they're ready to be printed */
|
||||
private GenomeLoc homozygousRegionStartChild; /* start of the homozygous region for child */
|
||||
private int callsWithinHomozygousRegion; /* number of calls in the current homozygous child region */
|
||||
private int childHomozygousRegionCounter; /* holds number of child homozygous regions */
|
||||
|
||||
public void initialize() {
|
||||
trioStructure = parseTrioDescription(familyStr);
|
||||
homozygousRegionStartChild = null;
|
||||
childHomozygousRegionCounter = 0;
|
||||
callsWithinHomozygousRegion = 0;
|
||||
contextBuffer = new ArrayList<VariantContext>(500);
|
||||
}
|
||||
|
||||
public static class TrioStructure {
|
||||
public String mom, dad, child;
|
||||
}
|
||||
|
||||
public static TrioStructure parseTrioDescription(String family) {
|
||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||
if (m.matches()) {
|
||||
TrioStructure trio = new TrioStructure();
|
||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
||||
trio.mom = m.group(1);
|
||||
trio.dad = m.group(2);
|
||||
trio.child = m.group(3);
|
||||
return trio;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||
}
|
||||
}
|
||||
|
||||
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
VariantContext trioVariants = tracker.getVariantContext(ref,"trio", EnumSet.allOf(VariantContext.Type.class), ref.getLocus(), true);
|
||||
// for this to work we need mismatching parents, one a homozyote, and a child homozygote
|
||||
|
||||
if ( trioVariants == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
//System.out.println(": "+trioVariants.getGenotype(trioStructure.mom)+" dad: "+trioVariants.getGenotype(trioStructure.dad)+" child: "+trioVariants.getGenotype(trioStructure.child));
|
||||
if ( isOppositeHomozygoteSite(trioVariants) && ! trioVariants.isFiltered()) {
|
||||
// find out who the homozygote is in the parents
|
||||
if ( trioVariants.getGenotype(trioStructure.mom).isHom() ) {
|
||||
return assessVariant(trioStructure.mom,trioStructure.dad,trioStructure.child,trioVariants,ref,context);
|
||||
} else if ( trioVariants.getGenotype(trioStructure.dad).isHom() ) {
|
||||
return assessVariant(trioStructure.dad,trioStructure.mom,trioStructure.child,trioVariants,ref,context);
|
||||
}
|
||||
}
|
||||
|
||||
return trioVariants;
|
||||
}
|
||||
|
||||
private boolean isOppositeHomozygoteSite(VariantContext trio) {
|
||||
if ( trio.getGenotype(trioStructure.child).isHet() ) { // not valid at child het sites
|
||||
return false;
|
||||
} else if ( trio.getHetCount() > 1 ) { // child is not het, so if this is 2, mom and dad are both het, invalid
|
||||
return false;
|
||||
} else if ( trio.getGenotype(trioStructure.dad) == null || trio.getGenotype(trioStructure.mom) == null ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private VariantContext assessVariant(String homParent, String otherParent, String child, VariantContext variCon, ReferenceContext refCon, AlignmentContext aliCon) {
|
||||
// see if the child matches the hom parent
|
||||
HashMap<String,Object> attributes = new HashMap<String,Object>(variCon.getAttributes());
|
||||
//System.out.println(refCon.getLocus()+" homParent: "+variCon.getGenotype(homParent).getGenotypeString()+" otherParent: "+variCon.getGenotype(otherParent).getGenotypeString()+" child: "+variCon.getGenotype(child).getGenotypeString());
|
||||
//do child and hom parent NOT match genotypes?
|
||||
if ( variCon.getGenotype(child).isHomRef() && variCon.getGenotype(homParent).isHomVar() ||
|
||||
variCon.getGenotype(child).isHomVar() && variCon.getGenotype(homParent).isHomRef() ) {
|
||||
// check for genotyping error (other must be het, or opposite of first parent)
|
||||
if ( variCon.getGenotype(otherParent).isHet() || variCon.getGenotype(otherParent).isHomRef() != variCon.getGenotype(homParent).isHomRef() ) {
|
||||
attributes.put("opHom",homParent);
|
||||
} else {
|
||||
attributes.put("opHom","genotypeError");
|
||||
}
|
||||
} else if ( variCon.getGenotype(otherParent).isHom() && variCon.getGenotype(otherParent).isHomRef() != variCon.getGenotype(homParent).isHomRef() ) {
|
||||
// is other parent both homozygous and different?
|
||||
attributes.put("opHom",otherParent);
|
||||
}
|
||||
// todo -- assessment of site based on alignment contest (tri allelic? etc)
|
||||
return new VariantContext(variCon.getName(), variCon.getLocation(), variCon.getAlleles(), variCon.getGenotypes(), variCon.getNegLog10PError(), variCon.getFilters(), attributes);
|
||||
}
|
||||
|
||||
public VCFWriter reduceInit() {
|
||||
VCFWriter writer = new VCFWriter(out);
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier"));
|
||||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||
//hInfo.add(new VCFHeaderLine("opHom","Child tentatively inheritied the NULL ALLELE from this parent"));
|
||||
// todo -- add info field annotation lines: "opHom", "CHR", "CHRS"
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
||||
writer.writeHeader(vcfHeader);
|
||||
|
||||
return writer;
|
||||
}
|
||||
|
||||
public VCFWriter reduce(VariantContext variCon, VCFWriter writer) {
|
||||
if ( variCon == null ) {
|
||||
return writer;
|
||||
}
|
||||
|
||||
if ( homozygosityRegionIsBroken(variCon) && contextBuffer.size() > 0 ) {
|
||||
outputBufferedRecords(contextBuffer,variCon,writer);
|
||||
} else if ( ! variCon.isFiltered() && ! variCon.getGenotype(trioStructure.child).isNoCall() ) {
|
||||
callsWithinHomozygousRegion++;
|
||||
}
|
||||
|
||||
if ( variCon.hasAttribute("opHom") ) {
|
||||
//writer.addRecord(VariantContextAdaptors.toVCF(variCon, variCon.getReference().getBases()[0]));
|
||||
contextBuffer.add(variCon);
|
||||
}
|
||||
|
||||
if ( homozygousRegionStartChild == null ) {
|
||||
if ( variCon.getGenotype(trioStructure.child).isHom() && ! variCon.isFiltered() ) {
|
||||
homozygousRegionStartChild = variCon.getLocation();
|
||||
}
|
||||
}
|
||||
|
||||
return writer;
|
||||
|
||||
}
|
||||
|
||||
private boolean homozygosityRegionIsBroken(VariantContext context) {
|
||||
// check to see if either the parent or child homozygosity regions have been broken
|
||||
if ( homozygousRegionStartChild != null && context.getGenotype(trioStructure.child).isHet() && ! context.isFiltered() ) {
|
||||
// NOTE: NO CALLS DO NOT BREAK REGIONS OF HOMOZYGOSITY
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void outputBufferedRecords(List<VariantContext> bufCon, VariantContext varCon, VCFWriter writer) {
|
||||
// the buffered contexts all share one feature -- come from the same child homozygosity region
|
||||
String regionSize;
|
||||
if ( varCon != null ) {
|
||||
regionSize = Integer.toString(varCon.getLocation().distance(homozygousRegionStartChild));
|
||||
} else {
|
||||
regionSize = "unknown";
|
||||
}
|
||||
for ( VariantContext vc : bufCon ) {
|
||||
HashMap<String,Object> attributes = new HashMap<String,Object>(vc.getAttributes());
|
||||
attributes.put("CHR",childHomozygousRegionCounter);
|
||||
attributes.put("CHRS",regionSize);
|
||||
attributes.put("CHRNCALL",callsWithinHomozygousRegion);
|
||||
attributes.put("CHRNOPHOM",bufCon.size());
|
||||
VariantContext newVC = new VariantContext(vc.getName(),vc.getLocation(),vc.getAlleles(),vc.getGenotypes(),vc.getNegLog10PError(),vc.getFilters(),attributes);
|
||||
writer.addRecord(VariantContextAdaptors.toVCF(newVC,vc.getReference().getBases()[0]));
|
||||
}
|
||||
childHomozygousRegionCounter++;
|
||||
homozygousRegionStartChild = null;
|
||||
callsWithinHomozygousRegion = 0;
|
||||
bufCon.clear();
|
||||
}
|
||||
|
||||
public void onTraversalDone(VCFWriter w) {
|
||||
outputBufferedRecords(contextBuffer,null,w);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue