Committing changes to comp overlap for indels. Passes all integration tests; minor changes to MVC walker.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3618 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9b8775180e
commit
75d4736600
|
|
@ -41,8 +41,11 @@ public class CompOverlap extends VariantEvaluator {
|
||||||
@DataPoint(name = "% concordant", description = "the concordance rate")
|
@DataPoint(name = "% concordant", description = "the concordance rate")
|
||||||
double concordantRate = 0.0;
|
double concordantRate = 0.0;
|
||||||
|
|
||||||
|
private boolean expectingIndels = false;
|
||||||
|
|
||||||
public CompOverlap(VariantEvalWalker parent) {
|
public CompOverlap(VariantEvalWalker parent) {
|
||||||
super(parent);
|
super(parent);
|
||||||
|
expectingIndels = parent.dels;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
|
|
@ -84,10 +87,10 @@ public class CompOverlap extends VariantEvaluator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
boolean compIsGood = comp != null && comp.isSNP() && comp.isNotFiltered();
|
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||||
boolean evalIsGood = eval != null && eval.isSNP();
|
boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
|
||||||
|
|
||||||
if (compIsGood) nCompSNPs++; // count the number of comp events
|
if ( compIsGood ) nCompSNPs++; // count the number of comp events
|
||||||
if (evalIsGood) nEvalSNPs++; // count the number of eval events
|
if (evalIsGood) nEvalSNPs++; // count the number of eval events
|
||||||
|
|
||||||
if (compIsGood && evalIsGood) {
|
if (compIsGood && evalIsGood) {
|
||||||
|
|
@ -99,4 +102,6 @@ public class CompOverlap extends VariantEvaluator {
|
||||||
|
|
||||||
return null; // we don't capture any interesting sites
|
return null; // we don't capture any interesting sites
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,10 +48,8 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
String deNovoParentalAllele = "-0.1-1.1";
|
String deNovoParentalAllele = "-0.1-1.1";
|
||||||
@Argument(fullName="oppositeHomozygoteTriAllelicQ",required=false,doc="Cutoff for quality scores of 3rd allele at opposite homozygote sites to remove it from the violation set")
|
@Argument(fullName="oppositeHomozygoteTriAllelicQ",required=false,doc="Cutoff for quality scores of 3rd allele at opposite homozygote sites to remove it from the violation set")
|
||||||
int opHomTriQ = 20;
|
int opHomTriQ = 20;
|
||||||
@Argument(fullName="oppositeHomozygoteParentAllele",required=false,doc="Range for the parental allele in the parents at opposite homozygote sites for it to be kept in violation set")
|
@Argument(fullName="oppositeHomozygoteAlleleProportion",required=false,doc="Range for the parental allele in the parents at opposite homozygote sites for it to be kept in violation set")
|
||||||
String opHomParentAllele = "-0.1-1.1";
|
String opHomAlleleProp = "-0.1-1.1";
|
||||||
@Argument(fullName="oppositeHomozygoteChildAllele",required=false,doc="Range for the parental allele in the child at opposite homozygote sites for it to be kept in violation set")
|
|
||||||
String opHomChildAllele = "-0.1-1.1";
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -104,7 +102,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
HomozygosityRegion r = homozygousRegions.get(memberGenotype.getKey());
|
HomozygosityRegion r = homozygousRegions.get(memberGenotype.getKey());
|
||||||
r.lastSeen = v.getLocus();
|
r.lastSeen = v.getLocus();
|
||||||
r.callsWithinRegion++;
|
r.callsWithinRegion++;
|
||||||
if ( v.type != MendelianViolationType.NONE && ! v.type.isFiltered() ) {
|
if ( v.type != MendelianViolationType.NONE && ! v.violationIsFiltered() ) {
|
||||||
v.addAttribute(regionKeys.get(memberGenotype.getKey()).getKey(),homozygousRegionCounts.get(memberGenotype.getKey()));
|
v.addAttribute(regionKeys.get(memberGenotype.getKey()).getKey(),homozygousRegionCounts.get(memberGenotype.getKey()));
|
||||||
if ( v.type == MendelianViolationType.DE_NOVO_SNP ) {
|
if ( v.type == MendelianViolationType.DE_NOVO_SNP ) {
|
||||||
r.deNovoSNPsInRegion++;
|
r.deNovoSNPsInRegion++;
|
||||||
|
|
@ -189,6 +187,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
public MendelianViolationType type;
|
public MendelianViolationType type;
|
||||||
private HashMap<String,Object> newAttributes;
|
private HashMap<String,Object> newAttributes;
|
||||||
private HashMap<String,Integer> homozygosityRegions;
|
private HashMap<String,Integer> homozygosityRegions;
|
||||||
|
private boolean filtered = false;
|
||||||
|
|
||||||
public MendelianViolation(VariantContext context, MendelianViolationType violationType) {
|
public MendelianViolation(VariantContext context, MendelianViolationType violationType) {
|
||||||
trio = context;
|
trio = context;
|
||||||
|
|
@ -250,6 +249,19 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
return trio.getAttribute(key);
|
return trio.getAttribute(key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void filter() {
|
||||||
|
filtered = true;
|
||||||
|
newAttributes.put(MendelianInfoKey.ViolationType.getKey(),"Filtered_"+newAttributes.get(MendelianInfoKey.ViolationType.getKey()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return filtered ? "Filtered_"+type.toString() : type.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean violationIsFiltered() {
|
||||||
|
return filtered;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public class Range {
|
public class Range {
|
||||||
|
|
@ -284,22 +296,13 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
NONE("none");
|
NONE("none");
|
||||||
|
|
||||||
private String infoString;
|
private String infoString;
|
||||||
private Boolean isFiltered = false;
|
|
||||||
|
|
||||||
MendelianViolationType(String typeName) {
|
MendelianViolationType(String typeName) {
|
||||||
infoString=typeName;
|
infoString=typeName;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return isFiltered ? "Filtered_"+infoString : infoString;
|
return infoString;
|
||||||
}
|
|
||||||
|
|
||||||
public void filter() {
|
|
||||||
isFiltered = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isFiltered() {
|
|
||||||
return isFiltered;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -345,8 +348,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
private ExtendedTrioStructure trioStructure;
|
private ExtendedTrioStructure trioStructure;
|
||||||
private UnifiedGenotyperEngine engine;
|
private UnifiedGenotyperEngine engine;
|
||||||
private Range deNovoRange;
|
private Range deNovoRange;
|
||||||
private Range opHomParentRange;
|
private Range opHomRange;
|
||||||
private Range opHomChildRange;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
***************** INITIALIZE
|
***************** INITIALIZE
|
||||||
|
|
@ -354,8 +356,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
trioStructure = new ExtendedTrioStructure(familyStr);
|
trioStructure = new ExtendedTrioStructure(familyStr);
|
||||||
deNovoRange = new Range(deNovoParentalAllele);
|
deNovoRange = new Range(deNovoParentalAllele);
|
||||||
opHomParentRange = new Range(opHomParentAllele);
|
opHomRange = new Range(opHomAlleleProp);
|
||||||
opHomChildRange = new Range(opHomChildAllele);
|
|
||||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||||
uac.MIN_BASE_QUALTY_SCORE = 10;
|
uac.MIN_BASE_QUALTY_SCORE = 10;
|
||||||
uac.MIN_MAPPING_QUALTY_SCORE = 10;
|
uac.MIN_MAPPING_QUALTY_SCORE = 10;
|
||||||
|
|
@ -372,7 +373,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
VCFWriter writer = new VCFWriter(out);
|
VCFWriter writer = new VCFWriter(out);
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||||
hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier"));
|
hInfo.add(new VCFHeaderLine("source", "MendelianViolationClassifier"));
|
||||||
for ( MendelianInfoKey key : EnumSet.allOf(MendelianInfoKey.class) ) {
|
for ( MendelianInfoKey key : EnumSet.allOf(MendelianInfoKey.class) ) {
|
||||||
hInfo.add( new VCFHeaderLine("INFO",key.toString()));
|
hInfo.add( new VCFHeaderLine("INFO",key.toString()));
|
||||||
}
|
}
|
||||||
|
|
@ -467,7 +468,8 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
if ( proportion != null ) {
|
if ( proportion != null ) {
|
||||||
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(), proportion);
|
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(), proportion);
|
||||||
if ( ! deNovoRange.contains(proportion) ) {
|
if ( ! deNovoRange.contains(proportion) ) {
|
||||||
violation.type.filter();
|
//System.out.println("Filtering deNovo by proportion: is "+proportion+" should be in range "+deNovoRange.lower+"-"+deNovoRange.upper);
|
||||||
|
violation.filter();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -476,12 +478,12 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
|
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
|
||||||
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
|
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
|
||||||
if ( triAl.second >= deNovoTriQ ) {
|
if ( triAl.second >= deNovoTriQ ) {
|
||||||
violation.type.filter();
|
violation.filter();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
violation.type.filter();
|
violation.filter();
|
||||||
}
|
}
|
||||||
|
|
||||||
return violation;
|
return violation;
|
||||||
|
|
@ -501,29 +503,29 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
|
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());
|
||||||
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
|
violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second);
|
||||||
if ( triAl.second >= opHomTriQ ) {
|
if ( triAl.second >= opHomTriQ ) {
|
||||||
violation.type.filter();
|
violation.filter();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.child));
|
Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.child).getAllele(0),splitCon.get(trioStructure.child));
|
||||||
Double motherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.mom));
|
Double motherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.mom));
|
||||||
Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.dad));
|
Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.dad).getAllele(0),splitCon.get(trioStructure.dad));
|
||||||
if ( childProp != null ) {
|
if ( childProp != null ) {
|
||||||
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(),childProp);
|
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(),childProp);
|
||||||
if ( ! opHomChildRange.contains(childProp) ) {
|
if ( ! opHomRange.contains(childProp) ) {
|
||||||
violation.type.filter();
|
violation.filter();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( motherProp != null && ! opHomParentRange.contains(motherProp) ) {
|
if ( motherProp != null && ! opHomRange.contains(motherProp) ) {
|
||||||
violation.type.filter();
|
violation.filter();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( fatherProp != null && ! opHomParentRange.contains(fatherProp) ) {
|
if ( fatherProp != null && ! opHomRange.contains(fatherProp) ) {
|
||||||
violation.type.filter();
|
violation.filter();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
violation.type.filter();
|
violation.filter();
|
||||||
}
|
}
|
||||||
|
|
||||||
return violation;
|
return violation;
|
||||||
|
|
|
||||||
|
|
@ -1,198 +0,0 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
|
||||||
|
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
|
||||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
|
||||||
*
|
|
||||||
* @Author chartl
|
|
||||||
* @Date Jun 7, 2010
|
|
||||||
*/
|
|
||||||
public class OppositeHomozygoteClassifier extends RodWalker<VariantContext,VCFWriter> {
|
|
||||||
@Argument(shortName="f",fullName="familyPattern",required=true,doc="Pattern for the family structure (usage: mom+dad=child)")
|
|
||||||
String familyStr = null;
|
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
|
||||||
private TrioStructure trioStructure; /* holds sample names of mom dad child */
|
|
||||||
private ArrayList<VariantContext> contextBuffer; /* holds contexts until they're ready to be printed */
|
|
||||||
private GenomeLoc homozygousRegionStartChild; /* start of the homozygous region for child */
|
|
||||||
private int callsWithinHomozygousRegion; /* number of calls in the current homozygous child region */
|
|
||||||
private int childHomozygousRegionCounter; /* holds number of child homozygous regions */
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
trioStructure = parseTrioDescription(familyStr);
|
|
||||||
homozygousRegionStartChild = null;
|
|
||||||
childHomozygousRegionCounter = 0;
|
|
||||||
callsWithinHomozygousRegion = 0;
|
|
||||||
contextBuffer = new ArrayList<VariantContext>(500);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class TrioStructure {
|
|
||||||
public String mom, dad, child;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static TrioStructure parseTrioDescription(String family) {
|
|
||||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
|
||||||
if (m.matches()) {
|
|
||||||
TrioStructure trio = new TrioStructure();
|
|
||||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
|
||||||
trio.mom = m.group(1);
|
|
||||||
trio.dad = m.group(2);
|
|
||||||
trio.child = m.group(3);
|
|
||||||
return trio;
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
if ( tracker == null ) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
VariantContext trioVariants = tracker.getVariantContext(ref,"trio", EnumSet.allOf(VariantContext.Type.class), ref.getLocus(), true);
|
|
||||||
// for this to work we need mismatching parents, one a homozyote, and a child homozygote
|
|
||||||
|
|
||||||
if ( trioVariants == null ) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
//System.out.println(": "+trioVariants.getGenotype(trioStructure.mom)+" dad: "+trioVariants.getGenotype(trioStructure.dad)+" child: "+trioVariants.getGenotype(trioStructure.child));
|
|
||||||
if ( isOppositeHomozygoteSite(trioVariants) && ! trioVariants.isFiltered()) {
|
|
||||||
// find out who the homozygote is in the parents
|
|
||||||
if ( trioVariants.getGenotype(trioStructure.mom).isHom() ) {
|
|
||||||
return assessVariant(trioStructure.mom,trioStructure.dad,trioStructure.child,trioVariants,ref,context);
|
|
||||||
} else if ( trioVariants.getGenotype(trioStructure.dad).isHom() ) {
|
|
||||||
return assessVariant(trioStructure.dad,trioStructure.mom,trioStructure.child,trioVariants,ref,context);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return trioVariants;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isOppositeHomozygoteSite(VariantContext trio) {
|
|
||||||
if ( trio.getGenotype(trioStructure.child).isHet() ) { // not valid at child het sites
|
|
||||||
return false;
|
|
||||||
} else if ( trio.getHetCount() > 1 ) { // child is not het, so if this is 2, mom and dad are both het, invalid
|
|
||||||
return false;
|
|
||||||
} else if ( trio.getGenotype(trioStructure.dad) == null || trio.getGenotype(trioStructure.mom) == null ) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private VariantContext assessVariant(String homParent, String otherParent, String child, VariantContext variCon, ReferenceContext refCon, AlignmentContext aliCon) {
|
|
||||||
// see if the child matches the hom parent
|
|
||||||
HashMap<String,Object> attributes = new HashMap<String,Object>(variCon.getAttributes());
|
|
||||||
//System.out.println(refCon.getLocus()+" homParent: "+variCon.getGenotype(homParent).getGenotypeString()+" otherParent: "+variCon.getGenotype(otherParent).getGenotypeString()+" child: "+variCon.getGenotype(child).getGenotypeString());
|
|
||||||
//do child and hom parent NOT match genotypes?
|
|
||||||
if ( variCon.getGenotype(child).isHomRef() && variCon.getGenotype(homParent).isHomVar() ||
|
|
||||||
variCon.getGenotype(child).isHomVar() && variCon.getGenotype(homParent).isHomRef() ) {
|
|
||||||
// check for genotyping error (other must be het, or opposite of first parent)
|
|
||||||
if ( variCon.getGenotype(otherParent).isHet() || variCon.getGenotype(otherParent).isHomRef() != variCon.getGenotype(homParent).isHomRef() ) {
|
|
||||||
attributes.put("opHom",homParent);
|
|
||||||
} else {
|
|
||||||
attributes.put("opHom","genotypeError");
|
|
||||||
}
|
|
||||||
} else if ( variCon.getGenotype(otherParent).isHom() && variCon.getGenotype(otherParent).isHomRef() != variCon.getGenotype(homParent).isHomRef() ) {
|
|
||||||
// is other parent both homozygous and different?
|
|
||||||
attributes.put("opHom",otherParent);
|
|
||||||
}
|
|
||||||
// todo -- assessment of site based on alignment contest (tri allelic? etc)
|
|
||||||
return new VariantContext(variCon.getName(), variCon.getLocation(), variCon.getAlleles(), variCon.getGenotypes(), variCon.getNegLog10PError(), variCon.getFilters(), attributes);
|
|
||||||
}
|
|
||||||
|
|
||||||
public VCFWriter reduceInit() {
|
|
||||||
VCFWriter writer = new VCFWriter(out);
|
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
|
||||||
hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier"));
|
|
||||||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
|
||||||
//hInfo.add(new VCFHeaderLine("opHom","Child tentatively inheritied the NULL ALLELE from this parent"));
|
|
||||||
// todo -- add info field annotation lines: "opHom", "CHR", "CHRS"
|
|
||||||
VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
|
||||||
writer.writeHeader(vcfHeader);
|
|
||||||
|
|
||||||
return writer;
|
|
||||||
}
|
|
||||||
|
|
||||||
public VCFWriter reduce(VariantContext variCon, VCFWriter writer) {
|
|
||||||
if ( variCon == null ) {
|
|
||||||
return writer;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( homozygosityRegionIsBroken(variCon) && contextBuffer.size() > 0 ) {
|
|
||||||
outputBufferedRecords(contextBuffer,variCon,writer);
|
|
||||||
} else if ( ! variCon.isFiltered() && ! variCon.getGenotype(trioStructure.child).isNoCall() ) {
|
|
||||||
callsWithinHomozygousRegion++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( variCon.hasAttribute("opHom") ) {
|
|
||||||
//writer.addRecord(VariantContextAdaptors.toVCF(variCon, variCon.getReference().getBases()[0]));
|
|
||||||
contextBuffer.add(variCon);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( homozygousRegionStartChild == null ) {
|
|
||||||
if ( variCon.getGenotype(trioStructure.child).isHom() && ! variCon.isFiltered() ) {
|
|
||||||
homozygousRegionStartChild = variCon.getLocation();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return writer;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean homozygosityRegionIsBroken(VariantContext context) {
|
|
||||||
// check to see if either the parent or child homozygosity regions have been broken
|
|
||||||
if ( homozygousRegionStartChild != null && context.getGenotype(trioStructure.child).isHet() && ! context.isFiltered() ) {
|
|
||||||
// NOTE: NO CALLS DO NOT BREAK REGIONS OF HOMOZYGOSITY
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void outputBufferedRecords(List<VariantContext> bufCon, VariantContext varCon, VCFWriter writer) {
|
|
||||||
// the buffered contexts all share one feature -- come from the same child homozygosity region
|
|
||||||
String regionSize;
|
|
||||||
if ( varCon != null ) {
|
|
||||||
regionSize = Integer.toString(varCon.getLocation().distance(homozygousRegionStartChild));
|
|
||||||
} else {
|
|
||||||
regionSize = "unknown";
|
|
||||||
}
|
|
||||||
for ( VariantContext vc : bufCon ) {
|
|
||||||
HashMap<String,Object> attributes = new HashMap<String,Object>(vc.getAttributes());
|
|
||||||
attributes.put("CHR",childHomozygousRegionCounter);
|
|
||||||
attributes.put("CHRS",regionSize);
|
|
||||||
attributes.put("CHRNCALL",callsWithinHomozygousRegion);
|
|
||||||
attributes.put("CHRNOPHOM",bufCon.size());
|
|
||||||
VariantContext newVC = new VariantContext(vc.getName(),vc.getLocation(),vc.getAlleles(),vc.getGenotypes(),vc.getNegLog10PError(),vc.getFilters(),attributes);
|
|
||||||
writer.addRecord(VariantContextAdaptors.toVCF(newVC,vc.getReference().getBases()[0]));
|
|
||||||
}
|
|
||||||
childHomozygousRegionCounter++;
|
|
||||||
homozygousRegionStartChild = null;
|
|
||||||
callsWithinHomozygousRegion = 0;
|
|
||||||
bufCon.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraversalDone(VCFWriter w) {
|
|
||||||
outputBufferedRecords(contextBuffer,null,w);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue