package org.broadinstitute.sting.playground.gatk.walkers; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.playground.indels.Matrix; import java.util.*; import java.io.*; // Beta iterative multi-sample caller // j.maguire 6-11-2009 public class MultiSampleCallerAccuracyTest extends MultiSampleCaller { @Argument(required=false, shortName="lod_threshold", doc="") public double LOD_THRESHOLD = 1e-6; @Argument(required=true, shortName="stats_output", doc="") public String STATS_OUTPUT; Matrix n_variants; Matrix n_found; PrintStream stats_output; public void initialize() { this.DISCOVERY_OUTPUT = "/dev/null"; this.INDIVIDUAL_OUTPUT = "/dev/null"; super.initialize(); n_variants = new Matrix(sample_names.size()*2, sample_names.size()*2); n_found = new Matrix(sample_names.size()*2, sample_names.size()*2); for (int i = 0; i < sample_names.size()*2; i++) { for (int j = 0; j < sample_names.size()*2; j++) { n_variants.set(i,j,0); n_found.set(i,j,0); } } try { stats_output = new PrintStream(STATS_OUTPUT); } catch (Exception e) { throw new RuntimeException(e); } } public MultiSampleCallResult map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { HapMapGenotypeROD hapmap = (HapMapGenotypeROD)tracker.lookup("hapmap", null); // Collect all the variants and the normals. ArrayList variant_samples = new ArrayList(); ArrayList reference_samples = new ArrayList(); int n_ref_chromosomes = 0; int n_alt_chromosomes = 0; String reference_genotype = String.format("%c%c", ref, ref); for (int i = 0; i < sample_names.size(); i++) { String true_genotype = hapmap.get(sample_names.get(i)); if (true_genotype == null) { continue; } if (true_genotype.equals(reference_genotype)) { reference_samples.add(sample_names.get(i)); } else { variant_samples.add(sample_names.get(i)); } if (true_genotype.equals(reference_genotype)) { n_ref_chromosomes += 1; } else if (true_genotype.contains(String.format("%c",ref))) { n_ref_chromosomes += 1; n_alt_chromosomes += 1; } else { n_alt_chromosomes += 2; } } // Put together a context. ArrayList working_samples = new ArrayList(); working_samples.addAll(variant_samples); working_samples.addAll(reference_samples); AlignmentContext working_context = filterAlignmentContextBySamples(context, working_samples); // Call. MultiSampleCallResult call_result = super.map(tracker, ref, working_context); EM_Result em_result = call_result.em_result; // Compute Statistics. if (n_variants == null) { System.out.printf("n_variants is null\n"); } if (n_found == null) { System.out.printf("n_found is null\n"); } n_variants.set(n_ref_chromosomes, n_alt_chromosomes, n_variants.get(n_ref_chromosomes, n_alt_chromosomes)+1); if ((call_result.lod > LOD_THRESHOLD) && (n_alt_chromosomes >= 1)) { n_found.set(n_ref_chromosomes, n_alt_chromosomes, n_found.get(n_ref_chromosomes, n_alt_chromosomes)+1); } return null; } private void PrintStats() { stats_output.printf("n_reference_chromosomes n_variant_chromosomes n_sites n_found fraction_found\n"); for (int i = 0; i < sample_names.size()*2; i++) { for (int j = 0; j < sample_names.size()*2; j++) { int N = (int)n_variants.get(i,j); int found = (int)n_found.get(i,j); if (N == 0) { continue; } if (found == 0) { continue; } double fraction_found = 100.0 * (double)found / (double)N; n_variants.set(i,j,0); n_found.set(i,j,0); stats_output.printf("%d %d %d %d %f\n", i, j, N, found, fraction_found); } } } public void onTraversalDone(String sum) { PrintStats(); stats_output.flush(); stats_output.close(); out.println("MultiSampleCallerAccuracyTest done."); return; } public String reduceInit() { return super.reduceInit(); } public String reduce(MultiSampleCallResult record, String sum) { return super.reduce(record, sum); } // END Walker Interface Functions ///////// ///////// // BEGIN Utility Functions // Filter a locus context by sample IDs // (pulls out only reads from the specified samples, and returns them in one context). private AlignmentContext filterAlignmentContextBySamples(AlignmentContext context, List sample_names) { HashSet index = new HashSet(); for (int i = 0; i < sample_names.size(); i++) { index.add(sample_names.get(i)); } ArrayList reads = new ArrayList(); ArrayList offsets = new ArrayList(); for (int i = 0; i < context.getReads().size(); i++) { SAMRecord read = context.getReads().get(i); Integer offset = context.getOffsets().get(i); String RG = (String)(read.getAttribute("RG")); assert(header != null); assert(header.getReadGroup(RG) != null); String sample = header.getReadGroup(RG).getSample(); if (SAMPLE_NAME_REGEX != null) { sample = sample.replaceAll(SAMPLE_NAME_REGEX, "$1"); } if (index.contains(sample)) { reads.add(read); offsets.add(offset); } } return new AlignmentContext(context.getLocation(), reads, offsets); } // END Utility Functions ///////// }