Committing changes to comp overlap for indels. Passes all integration tests; minor changes to MVC walker.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3618 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-06-23 15:49:13 +00:00
parent 9b8775180e
commit 75d4736600
3 changed files with 43 additions and 234 deletions

View File

@ -41,8 +41,11 @@ public class CompOverlap extends VariantEvaluator {
@DataPoint(name = "% concordant", description = "the concordance rate")
double concordantRate = 0.0;
private boolean expectingIndels = false;
public CompOverlap(VariantEvalWalker parent) {
super(parent);
expectingIndels = parent.dels;
}
public String getName() {
@ -84,10 +87,10 @@ public class CompOverlap extends VariantEvaluator {
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean compIsGood = comp != null && comp.isSNP() && comp.isNotFiltered();
boolean evalIsGood = eval != null && eval.isSNP();
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
if (compIsGood) nCompSNPs++; // count the number of comp events
if ( compIsGood ) nCompSNPs++; // count the number of comp events
if (evalIsGood) nEvalSNPs++; // count the number of eval events
if (compIsGood && evalIsGood) {
@ -99,4 +102,6 @@ public class CompOverlap extends VariantEvaluator {
return null; // we don't capture any interesting sites
}
}

View File

@ -48,10 +48,8 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
String deNovoParentalAllele = "-0.1-1.1";
@Argument(fullName="oppositeHomozygoteTriAllelicQ",required=false,doc="Cutoff for quality scores of 3rd allele at opposite homozygote sites to remove it from the violation set")
int opHomTriQ = 20;
@Argument(fullName="oppositeHomozygoteParentAllele",required=false,doc="Range for the parental allele in the parents at opposite homozygote sites for it to be kept in violation set")
String opHomParentAllele = "-0.1-1.1";
@Argument(fullName="oppositeHomozygoteChildAllele",required=false,doc="Range for the parental allele in the child at opposite homozygote sites for it to be kept in violation set")
String opHomChildAllele = "-0.1-1.1";
@Argument(fullName="oppositeHomozygoteAlleleProportion",required=false,doc="Range for the parental allele in the parents at opposite homozygote sites for it to be kept in violation set")
String opHomAlleleProp = "-0.1-1.1";
/*
@ -104,7 +102,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
HomozygosityRegion r = homozygousRegions.get(memberGenotype.getKey());
r.lastSeen = v.getLocus();
r.callsWithinRegion++;
if ( v.type != MendelianViolationType.NONE && ! v.type.isFiltered() ) {
if ( v.type != MendelianViolationType.NONE && ! v.violationIsFiltered() ) {
v.addAttribute(regionKeys.get(memberGenotype.getKey()).getKey(),homozygousRegionCounts.get(memberGenotype.getKey()));
if ( v.type == MendelianViolationType.DE_NOVO_SNP ) {
r.deNovoSNPsInRegion++;
@ -189,6 +187,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
public MendelianViolationType type;
private HashMap<String,Object> newAttributes;
private HashMap<String,Integer> homozygosityRegions;
private boolean filtered = false;
public MendelianViolation(VariantContext context, MendelianViolationType violationType) {
trio = context;
@ -250,6 +249,19 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
return trio.getAttribute(key);
}
}
public void filter() {
filtered = true;
newAttributes.put(MendelianInfoKey.ViolationType.getKey(),"Filtered_"+newAttributes.get(MendelianInfoKey.ViolationType.getKey()));
}
public String getType() {
return filtered ? "Filtered_"+type.toString() : type.toString();
}
public boolean violationIsFiltered() {
return filtered;
}
}
public class Range {
@ -284,22 +296,13 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
NONE("none");
private String infoString;
private Boolean isFiltered = false;
MendelianViolationType(String typeName) {
infoString=typeName;
}
public String toString() {
return isFiltered ? "Filtered_"+infoString : infoString;
}
public void filter() {
isFiltered = true;
}
public boolean isFiltered() {
return isFiltered;
return infoString;
}
}
@ -345,8 +348,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
private ExtendedTrioStructure trioStructure;
private UnifiedGenotyperEngine engine;
private Range deNovoRange;
private Range opHomParentRange;
private Range opHomChildRange;
private Range opHomRange;
/*
***************** INITIALIZE
@ -354,8 +356,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
public void initialize() {
trioStructure = new ExtendedTrioStructure(familyStr);
deNovoRange = new Range(deNovoParentalAllele);
opHomParentRange = new Range(opHomParentAllele);
opHomChildRange = new Range(opHomChildAllele);
opHomRange = new Range(opHomAlleleProp);
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.MIN_BASE_QUALTY_SCORE = 10;
uac.MIN_MAPPING_QUALTY_SCORE = 10;
@ -372,7 +373,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
VCFWriter writer = new VCFWriter(out);
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier"));
hInfo.add(new VCFHeaderLine("source", "MendelianViolationClassifier"));
for ( MendelianInfoKey key : EnumSet.allOf(MendelianInfoKey.class) ) {
hInfo.add( new VCFHeaderLine("INFO",key.toString()));
}
@ -467,7 +468,8 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
if ( proportion != null ) {
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(), proportion);
if ( ! deNovoRange.contains(proportion) ) {
violation.type.filter();
//System.out.println("Filtering deNovo by proportion: is "+proportion+" should be in range "+deNovoRange.lower+"-"+deNovoRange.upper);
violation.filter();
}
}
@ -476,12 +478,12 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
if ( triAl.second >= deNovoTriQ ) {
violation.type.filter();
violation.filter();
}
}
} else {
violation.type.filter();
violation.filter();
}
return violation;
@ -501,29 +503,29 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
if ( triAl.second >= opHomTriQ ) {
violation.type.filter();
violation.filter();
}
}
Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.child));
Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.child).getAllele(0),splitCon.get(trioStructure.child));
Double motherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.mom));
Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.dad));
Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.dad).getAllele(0),splitCon.get(trioStructure.dad));
if ( childProp != null ) {
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(),childProp);
if ( ! opHomChildRange.contains(childProp) ) {
violation.type.filter();
if ( ! opHomRange.contains(childProp) ) {
violation.filter();
}
}
if ( motherProp != null && ! opHomParentRange.contains(motherProp) ) {
violation.type.filter();
if ( motherProp != null && ! opHomRange.contains(motherProp) ) {
violation.filter();
}
if ( fatherProp != null && ! opHomParentRange.contains(fatherProp) ) {
violation.type.filter();
if ( fatherProp != null && ! opHomRange.contains(fatherProp) ) {
violation.filter();
}
} else {
violation.type.filter();
violation.filter();
}
return violation;

View File

@ -1,198 +0,0 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date Jun 7, 2010
*/
public class OppositeHomozygoteClassifier extends RodWalker<VariantContext,VCFWriter> {
@Argument(shortName="f",fullName="familyPattern",required=true,doc="Pattern for the family structure (usage: mom+dad=child)")
String familyStr = null;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
private TrioStructure trioStructure; /* holds sample names of mom dad child */
private ArrayList<VariantContext> contextBuffer; /* holds contexts until they're ready to be printed */
private GenomeLoc homozygousRegionStartChild; /* start of the homozygous region for child */
private int callsWithinHomozygousRegion; /* number of calls in the current homozygous child region */
private int childHomozygousRegionCounter; /* holds number of child homozygous regions */
public void initialize() {
trioStructure = parseTrioDescription(familyStr);
homozygousRegionStartChild = null;
childHomozygousRegionCounter = 0;
callsWithinHomozygousRegion = 0;
contextBuffer = new ArrayList<VariantContext>(500);
}
public static class TrioStructure {
public String mom, dad, child;
}
public static TrioStructure parseTrioDescription(String family) {
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
TrioStructure trio = new TrioStructure();
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
trio.mom = m.group(1);
trio.dad = m.group(2);
trio.child = m.group(3);
return trio;
} else {
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
}
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) {
return null;
}
VariantContext trioVariants = tracker.getVariantContext(ref,"trio", EnumSet.allOf(VariantContext.Type.class), ref.getLocus(), true);
// for this to work we need mismatching parents, one a homozyote, and a child homozygote
if ( trioVariants == null ) {
return null;
}
//System.out.println(": "+trioVariants.getGenotype(trioStructure.mom)+" dad: "+trioVariants.getGenotype(trioStructure.dad)+" child: "+trioVariants.getGenotype(trioStructure.child));
if ( isOppositeHomozygoteSite(trioVariants) && ! trioVariants.isFiltered()) {
// find out who the homozygote is in the parents
if ( trioVariants.getGenotype(trioStructure.mom).isHom() ) {
return assessVariant(trioStructure.mom,trioStructure.dad,trioStructure.child,trioVariants,ref,context);
} else if ( trioVariants.getGenotype(trioStructure.dad).isHom() ) {
return assessVariant(trioStructure.dad,trioStructure.mom,trioStructure.child,trioVariants,ref,context);
}
}
return trioVariants;
}
private boolean isOppositeHomozygoteSite(VariantContext trio) {
if ( trio.getGenotype(trioStructure.child).isHet() ) { // not valid at child het sites
return false;
} else if ( trio.getHetCount() > 1 ) { // child is not het, so if this is 2, mom and dad are both het, invalid
return false;
} else if ( trio.getGenotype(trioStructure.dad) == null || trio.getGenotype(trioStructure.mom) == null ) {
return false;
}
return true;
}
private VariantContext assessVariant(String homParent, String otherParent, String child, VariantContext variCon, ReferenceContext refCon, AlignmentContext aliCon) {
// see if the child matches the hom parent
HashMap<String,Object> attributes = new HashMap<String,Object>(variCon.getAttributes());
//System.out.println(refCon.getLocus()+" homParent: "+variCon.getGenotype(homParent).getGenotypeString()+" otherParent: "+variCon.getGenotype(otherParent).getGenotypeString()+" child: "+variCon.getGenotype(child).getGenotypeString());
//do child and hom parent NOT match genotypes?
if ( variCon.getGenotype(child).isHomRef() && variCon.getGenotype(homParent).isHomVar() ||
variCon.getGenotype(child).isHomVar() && variCon.getGenotype(homParent).isHomRef() ) {
// check for genotyping error (other must be het, or opposite of first parent)
if ( variCon.getGenotype(otherParent).isHet() || variCon.getGenotype(otherParent).isHomRef() != variCon.getGenotype(homParent).isHomRef() ) {
attributes.put("opHom",homParent);
} else {
attributes.put("opHom","genotypeError");
}
} else if ( variCon.getGenotype(otherParent).isHom() && variCon.getGenotype(otherParent).isHomRef() != variCon.getGenotype(homParent).isHomRef() ) {
// is other parent both homozygous and different?
attributes.put("opHom",otherParent);
}
// todo -- assessment of site based on alignment contest (tri allelic? etc)
return new VariantContext(variCon.getName(), variCon.getLocation(), variCon.getAlleles(), variCon.getGenotypes(), variCon.getNegLog10PError(), variCon.getFilters(), attributes);
}
public VCFWriter reduceInit() {
VCFWriter writer = new VCFWriter(out);
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier"));
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
//hInfo.add(new VCFHeaderLine("opHom","Child tentatively inheritied the NULL ALLELE from this parent"));
// todo -- add info field annotation lines: "opHom", "CHR", "CHRS"
VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
writer.writeHeader(vcfHeader);
return writer;
}
public VCFWriter reduce(VariantContext variCon, VCFWriter writer) {
if ( variCon == null ) {
return writer;
}
if ( homozygosityRegionIsBroken(variCon) && contextBuffer.size() > 0 ) {
outputBufferedRecords(contextBuffer,variCon,writer);
} else if ( ! variCon.isFiltered() && ! variCon.getGenotype(trioStructure.child).isNoCall() ) {
callsWithinHomozygousRegion++;
}
if ( variCon.hasAttribute("opHom") ) {
//writer.addRecord(VariantContextAdaptors.toVCF(variCon, variCon.getReference().getBases()[0]));
contextBuffer.add(variCon);
}
if ( homozygousRegionStartChild == null ) {
if ( variCon.getGenotype(trioStructure.child).isHom() && ! variCon.isFiltered() ) {
homozygousRegionStartChild = variCon.getLocation();
}
}
return writer;
}
private boolean homozygosityRegionIsBroken(VariantContext context) {
// check to see if either the parent or child homozygosity regions have been broken
if ( homozygousRegionStartChild != null && context.getGenotype(trioStructure.child).isHet() && ! context.isFiltered() ) {
// NOTE: NO CALLS DO NOT BREAK REGIONS OF HOMOZYGOSITY
return true;
}
return false;
}
private void outputBufferedRecords(List<VariantContext> bufCon, VariantContext varCon, VCFWriter writer) {
// the buffered contexts all share one feature -- come from the same child homozygosity region
String regionSize;
if ( varCon != null ) {
regionSize = Integer.toString(varCon.getLocation().distance(homozygousRegionStartChild));
} else {
regionSize = "unknown";
}
for ( VariantContext vc : bufCon ) {
HashMap<String,Object> attributes = new HashMap<String,Object>(vc.getAttributes());
attributes.put("CHR",childHomozygousRegionCounter);
attributes.put("CHRS",regionSize);
attributes.put("CHRNCALL",callsWithinHomozygousRegion);
attributes.put("CHRNOPHOM",bufCon.size());
VariantContext newVC = new VariantContext(vc.getName(),vc.getLocation(),vc.getAlleles(),vc.getGenotypes(),vc.getNegLog10PError(),vc.getFilters(),attributes);
writer.addRecord(VariantContextAdaptors.toVCF(newVC,vc.getReference().getBases()[0]));
}
childHomozygousRegionCounter++;
homozygousRegionStartChild = null;
callsWithinHomozygousRegion = 0;
bufCon.clear();
}
public void onTraversalDone(VCFWriter w) {
outputBufferedRecords(contextBuffer,null,w);
}
}