Convert de novo SNP caller to run using parent1 and parent2 BAM files (by splitting contexts by reader using getMergedReadGroupsByReaders) instead of geli files providing a large speed-up and obviating the need for large whole-genome geli files.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1738 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-09-29 06:42:21 +00:00
parent 5dab95aa5a
commit 6134f49e3c
1 changed files with 77 additions and 31 deletions

View File

@ -5,10 +5,19 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariationRod;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.Set;
import java.util.List;
import java.util.ArrayList;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: andrewk
@ -17,45 +26,82 @@ import org.broadinstitute.sting.utils.genotype.Variation;
* To change this template use File | Settings | File Templates.
*/
@By(DataSource.REFERENCE)
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="child",type= VariationRod.class), @RMD(name="parent1",type= VariationRod.class), @RMD(name="parent2",type= VariationRod.class)})
//@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=AllelicVariant.class), @RMD(name="dbsnp",type=AllelicVariant.class),@RMD(name="hapmap-chip",type=AllelicVariant.class), @RMD(name="interval",type=IntervalRod.class)})
//@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="dbsnp",type=AllelicVariant.class)})
@Requires(value={DataSource.REFERENCE, DataSource.REFERENCE_BASES, DataSource.READS},referenceMetaData={@RMD(name="child",type= VariationRod.class)})
@Allows({DataSource.READS, DataSource.REFERENCE})
@ReadFilters(ZeroMappingQualityReadFilter.class)
//, @RMD(name="parent1",type= VariationRod.class), @RMD(name="parent2",type= VariationRod.class)})
public class DeNovoSNPWalker extends RefWalker<String, Integer>{
SingleSampleGenotyper SSG;
private List<Set<String>> readGroupSets;
public void initialize() {
SSG = new SingleSampleGenotyper();
SSG.initialize();
readGroupSets = getToolkit().getMergedReadGroupsByReaders();
}
public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
Variation child = (Variation)tracker.lookup("child", null);
Variation parent1 = (Variation)tracker.lookup("parent1", null);
Variation parent2 = (Variation)tracker.lookup("parent2", null);
Variation dbsnp = (Variation)tracker.lookup("dbSNP", null);
if (child != null && parent1 != null && parent2 != null) {
if (!(parent1 instanceof VariantBackedByGenotype) || !(parent2 instanceof VariantBackedByGenotype))
throw new StingException("Both parents ROD tracks must be backed by genotype data. Ensure that your parent rod(s) contain genotyping information");
if (child.isSNP() &&
child.getNegLog10PError() > 5 && // BTR > 5
parent1.isReference() &&
((VariantBackedByGenotype)parent1).getCalledGenotype().getNegLog10PError() > 5 &&
parent2.isReference() &&
((VariantBackedByGenotype)parent2).getCalledGenotype().getNegLog10PError() > 5
if (child != null) {
if (child.isSNP() && child.getNegLog10PError() > 5) { // BTR > 5
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
List<SAMRecord> parent1_reads = new ArrayList<SAMRecord>();
List<SAMRecord> parent2_reads = new ArrayList<SAMRecord>();
List<Integer> parent1_offsets = new ArrayList<Integer>();
List<Integer> parent2_offsets = new ArrayList<Integer>();
assert( reads.size() == offsets.size() ); // should be same number or we're in trouble
int num_reads = reads.size();
for (int i=0; i<num_reads; i++) {
SAMRecord read = reads.get(i);
Integer offset = offsets.get(i);
String readGroup = (String)read.getAttribute("RG");
if (readGroupSets.get(0).contains(readGroup)) {
parent1_reads.add(read);
parent1_offsets.add(offset);
} else if (readGroupSets.get(1).contains(readGroup)) {
parent2_reads.add(read);
parent2_offsets.add(offset);
}
}
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
SSGenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext);
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
SSGenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext);
if (!parent1.isVariant(parent1.getReference()) &&
parent1.getNegLog10PError() > 5 &&
!parent2.isVariant(parent2.getReference()) &&
parent2.getNegLog10PError() > 5
) {
double sumConfidences = 0.5 * (0.5 * child.getNegLog10PError() +
Math.min(((VariantBackedByGenotype)parent1).getCalledGenotype().getNegLog10PError(),
((VariantBackedByGenotype)parent2).getCalledGenotype().getNegLog10PError()));
double sumConfidences = 0.5 * (0.5 * child.getNegLog10PError() +
Math.min(parent1.getNegLog10PError(), parent2.getNegLog10PError()));
out.format("%s\t", child.getLocation().getContig());
out.format("%s\t", child.getLocation().getStart());
out.format("%.4f\t", sumConfidences);
out.format("%.4f\t", child.getNegLog10PError());
out.format("%.4f\t", ((VariantBackedByGenotype)parent1).getCalledGenotype().getNegLog10PError());
out.format("%.4f\t", ((VariantBackedByGenotype)parent2).getCalledGenotype().getNegLog10PError());
out.format("%s\t", dbsnp != null);
out.format("%s\t", child.getLocation().getContig());
out.format("%s\t", child.getLocation().getStart());
out.format("%.4f\t", sumConfidences);
out.format("%.4f\t", child.getNegLog10PError());
out.format("%.4f\t", parent1.getNegLog10PError());
out.format("%.4f\t", parent2.getNegLog10PError());
out.format("%s\t", dbsnp != null);
out.format ("%s\t", child.toString());
out.format ("%s\t", parent1.toString());
out.format ("%s", parent2.toString());
if (dbsnp != null)
out.format ("\tDBSNP\t:%s", dbsnp.toString());
out.println();
out.format ("%s\t", child.toString());
out.format ("%s\t", parent1.toString());
out.format ("%s", parent2.toString());
if (dbsnp != null)
out.format ("\tDBSNP\t:%s", dbsnp.toString());
out.println();
}
}
}
@ -67,5 +113,5 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
return 1;
}
public void onTraversalDone(Integer result) {} // Don't print the reduce result
}