Compares RBP phasing to a simple trio phasing model that can phase a child het iff both parental genotypes are known and at least one of them is not het [at EACH of the sites in the pair to be phased]
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5092 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
68729045ca
commit
466f8f8a3c
|
|
@ -0,0 +1,187 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.phasing;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walks along all variant ROD loci and verifies the phasing from the reads for user-defined pairs of sites.
|
||||||
|
*/
|
||||||
|
@Allows(value = {DataSource.REFERENCE})
|
||||||
|
@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = ComparePhasingToTrioPhasingNoRecombinationWalker.TRIO_ROD_NAME, type = ReferenceOrderedDatum.class), @RMD(name = ComparePhasingToTrioPhasingNoRecombinationWalker.PHASING_ROD_NAME, type = ReferenceOrderedDatum.class)})
|
||||||
|
|
||||||
|
@ReadFilters({ZeroMappingQualityReadFilter.class})
|
||||||
|
// Filter out all reads with zero mapping quality
|
||||||
|
|
||||||
|
public class ComparePhasingToTrioPhasingNoRecombinationWalker extends RodWalker<Integer, Integer> {
|
||||||
|
public final static String TRIO_ROD_NAME = "trio";
|
||||||
|
public final static String PHASING_ROD_NAME = "phasing";
|
||||||
|
|
||||||
|
private final static int NUM_IN_TRIO = 3;
|
||||||
|
|
||||||
|
@Output
|
||||||
|
protected PrintStream out;
|
||||||
|
|
||||||
|
private String phasingSample = null;
|
||||||
|
|
||||||
|
private enum TrioStatus {
|
||||||
|
PRESENT, MISSING, TRIPLE_HET
|
||||||
|
}
|
||||||
|
|
||||||
|
private GenomeLoc prevLoc = null;
|
||||||
|
private TrioStatus prevTrioStatus = TrioStatus.MISSING;
|
||||||
|
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean generateExtendedEvents() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param tracker the meta-data tracker
|
||||||
|
* @param ref the reference base
|
||||||
|
* @param context the context for the given locus
|
||||||
|
* @return statistics of and list of all phased VariantContexts and their base pileup that have gone out of cacheWindow range.
|
||||||
|
*/
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if (tracker == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
GenomeLoc curLoc = ref.getLocus();
|
||||||
|
VariantContext phasingVc = tracker.getVariantContext(ref, PHASING_ROD_NAME, curLoc);
|
||||||
|
if (phasingVc == null || phasingVc.isFiltered())
|
||||||
|
return null;
|
||||||
|
|
||||||
|
Map<String, Genotype> phasingSampleToGt = phasingVc.getGenotypes();
|
||||||
|
if (phasingSampleToGt.size() != 1)
|
||||||
|
throw new UserException("Must provide EXACTLY one sample in " + PHASING_ROD_NAME + " track!");
|
||||||
|
Map.Entry<String, Genotype> phasingSampGt = phasingSampleToGt.entrySet().iterator().next();
|
||||||
|
String sample = phasingSampGt.getKey();
|
||||||
|
if (phasingSample == null)
|
||||||
|
phasingSample = sample;
|
||||||
|
if (!sample.equals(phasingSample))
|
||||||
|
throw new UserException("Must provide EXACTLY one sample!");
|
||||||
|
Genotype phasingGt = phasingSampGt.getValue();
|
||||||
|
if (!phasingGt.isHet())
|
||||||
|
return null;
|
||||||
|
|
||||||
|
VariantContext trioVc = tracker.getVariantContext(ref, TRIO_ROD_NAME, curLoc);
|
||||||
|
boolean useTrioVc = (trioVc != null && !trioVc.isFiltered());
|
||||||
|
|
||||||
|
Genotype sampleGtInTrio = null;
|
||||||
|
if (useTrioVc) {
|
||||||
|
sampleGtInTrio = trioVc.getGenotype(phasingSample);
|
||||||
|
|
||||||
|
if (trioVc.getNSamples() > NUM_IN_TRIO || sampleGtInTrio == null)
|
||||||
|
throw new UserException("Must provide trio data for sample: " + phasingSample);
|
||||||
|
|
||||||
|
if (!new TreeSet<Allele>(phasingGt.getAlleles()).equals(new TreeSet<Allele>(sampleGtInTrio.getAlleles()))) {
|
||||||
|
logger.warn("Locus " + curLoc + " breaks phase, since " + PHASING_ROD_NAME + " and " + TRIO_ROD_NAME + " tracks have different genotypes for " + phasingSample + "!");
|
||||||
|
prevLoc = null;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, we have a [trio-consistent] het genotype that may be phased or not [and we want to know if it could be phased based on trio information]:
|
||||||
|
int processed = 1;
|
||||||
|
|
||||||
|
TrioStatus currentTrioStatus = TrioStatus.MISSING;
|
||||||
|
if (useTrioVc && trioVc.getNSamples() == NUM_IN_TRIO) {
|
||||||
|
boolean allHet = true;
|
||||||
|
for (int i = 0; i < NUM_IN_TRIO; i++) {
|
||||||
|
if (!trioVc.getGenotype(i).isHet()) {
|
||||||
|
allHet = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allHet)
|
||||||
|
currentTrioStatus = TrioStatus.TRIPLE_HET;
|
||||||
|
else
|
||||||
|
currentTrioStatus = TrioStatus.PRESENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prevLoc != null && curLoc.onSameContig(prevLoc)) {
|
||||||
|
String trioPhaseStatus;
|
||||||
|
|
||||||
|
if (prevTrioStatus == TrioStatus.TRIPLE_HET || currentTrioStatus == TrioStatus.TRIPLE_HET) {
|
||||||
|
trioPhaseStatus = "Het3";
|
||||||
|
}
|
||||||
|
else if (prevTrioStatus == TrioStatus.MISSING || currentTrioStatus == TrioStatus.MISSING) {
|
||||||
|
trioPhaseStatus = "Missing";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (prevTrioStatus != TrioStatus.PRESENT || currentTrioStatus != TrioStatus.PRESENT)
|
||||||
|
throw new ReviewedStingException("LOGICAL error: prevTrioStatus != TrioStatus.PRESENT || currentTrioStatus != TrioStatus.PRESENT");
|
||||||
|
|
||||||
|
trioPhaseStatus = "trio_phased";
|
||||||
|
}
|
||||||
|
|
||||||
|
out.println(prevLoc + "\t" + curLoc + "\t" + trioPhaseStatus + "\t" + phasingGt.isPhased());
|
||||||
|
}
|
||||||
|
|
||||||
|
prevLoc = curLoc;
|
||||||
|
prevTrioStatus = currentTrioStatus;
|
||||||
|
|
||||||
|
return processed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer addIn, Integer runningCount) {
|
||||||
|
if (addIn == null)
|
||||||
|
addIn = 0;
|
||||||
|
|
||||||
|
return runningCount + addIn;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param result the number of reads and VariantContexts seen.
|
||||||
|
*/
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
System.out.println("Processed " + result + " sites.");
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue