- Optimized implementation of -byReadGroup in DoCWalker

- Added implementation of -bySample in DoCWalker
- Removed CoverageBySample and added a watered down version to the examples directory



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2209 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-02 03:39:24 +00:00
parent 7c73496e72
commit b979bd2ced
3 changed files with 127 additions and 203 deletions

View File

@ -0,0 +1,68 @@
package org.broadinstitute.sting.gatk.examples;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.utils.pileup.*;
import java.util.*;
public class CoverageBySample extends LocusWalker<Integer, Integer> {
private HashSet<String> sampleNames = new HashSet<String>();
public boolean requiresReads() { return true; }
public void initialize() {
List<SAMReadGroupRecord> read_groups = this.getToolkit().getSAMFileHeader().getReadGroups();
for ( SAMReadGroupRecord record : read_groups ) {
String sample = record.getSample();
if ( sample != null )
sampleNames.add(sample);
}
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
HashMap<String, Integer> depthBySample = new HashMap<String, Integer>();
for ( String sample : sampleNames )
depthBySample.put(sample, 0);
ReadBackedPileup pileup = context.getPileup();
for ( PileupElement p : pileup ) {
SAMReadGroupRecord readGroup = p.getRead().getReadGroup();
if ( readGroup == null )
continue;
String sample = readGroup.getSample();
if ( sample != null ) {
int oldDepth = depthBySample.get(sample);
depthBySample.put(sample, oldDepth + 1);
}
}
for ( Map.Entry<String, Integer> sample : depthBySample.entrySet() ) {
out.printf(" %s %8d%n", sample.getKey(), sample.getValue());
}
return 1;
}
public void onTraversalDone(Integer result) {
out.println("Processed " + result + " loci.");
}
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
}

View File

@ -32,9 +32,7 @@ import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.*;
import net.sf.samtools.SAMReadGroupRecord;
import java.util.HashMap;
import java.util.Collections;
import java.util.ArrayList;
import java.util.*;
/**
* Display the depth of coverage at a given locus.
@ -48,16 +46,25 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
}
@Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
public printType printStyle = printType.COMPACT;
protected printType printStyle = printType.COMPACT;
@Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
public boolean excludeDeletionsInCoverage = false;
protected boolean excludeDeletionsInCoverage = false;
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
public int excludeMAPQBelowThis = -1;
protected int excludeMAPQBelowThis = -1;
@Argument(fullName="byReadGroup", shortName="byRG", doc="List read depths for each read group")
public boolean byReadGroup = false;
protected boolean byReadGroup = false;
@Argument(fullName="bySample", shortName="bySample", doc="List read depths for each sample")
protected boolean bySample = false;
private TreeSet<String> readGroupNames = new TreeSet<String>();
private TreeSet<String> sampleNames = new TreeSet<String>();
public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
@ -70,15 +77,32 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
break;
}
if ( byReadGroup ) {
List<SAMReadGroupRecord> readGroups = this.getToolkit().getSAMFileHeader().getReadGroups();
for ( SAMReadGroupRecord record : readGroups )
readGroupNames.add(record.getReadGroupId());
}
if ( bySample ) {
List<SAMReadGroupRecord> readGroups = this.getToolkit().getSAMFileHeader().getReadGroups();
for ( SAMReadGroupRecord record : readGroups ) {
String sample = record.getSample();
if ( sample != null )
sampleNames.add(sample);
}
}
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
HashMap<String, Integer> depthByReadGroup = new HashMap<String, Integer>();
for (SAMReadGroupRecord rg : this.getToolkit().getSAMFileHeader().getReadGroups()) {
depthByReadGroup.put(rg.getReadGroupId(), 0);
}
for ( String readGroupName : readGroupNames )
depthByReadGroup.put(readGroupName, 0);
HashMap<String, Integer> depthBySample = new HashMap<String, Integer>();
for ( String sample : sampleNames )
depthBySample.put(sample, 0);
ReadBackedPileup pileup = context.getPileup();
for ( PileupElement p : pileup ) {
@ -87,9 +111,23 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
else if ( p.isDeletion() ) nDeletionReads++;
else nCleanReads++;
String readGroupName = p.getRead().getReadGroup().getReadGroupId();
int oldDepth = depthByReadGroup.get(readGroupName);
depthByReadGroup.put(readGroupName, oldDepth + 1);
SAMReadGroupRecord readGroup = p.getRead().getReadGroup();
if ( readGroup == null )
continue;
if ( byReadGroup ) {
String readGroupName = readGroup.getReadGroupId();
int oldDepth = depthByReadGroup.get(readGroupName);
depthByReadGroup.put(readGroupName, oldDepth + 1);
}
if ( bySample ) {
String sample = readGroup.getSample();
if ( sample != null ) {
int oldDepth = depthBySample.get(sample);
depthBySample.put(sample, oldDepth + 1);
}
}
}
int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
@ -103,15 +141,18 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
break;
}
if (byReadGroup) {
ArrayList<String> rgs = new ArrayList<String>(depthByReadGroup.keySet());
Collections.sort(rgs);
for (String rg : rgs) {
if ( byReadGroup ) {
for ( String rg : readGroupNames ) {
out.printf(" %s %8d%n", rg, depthByReadGroup.get(rg));
}
}
if ( bySample ) {
for ( String sample : sampleNames ) {
out.printf(" %s %8d%n", sample, depthBySample.get(sample));
}
}
return nTotalReads;
}

View File

@ -1,185 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.*;
public class CoverageBySample extends LocusWalker<String, String>
{
List<String> sample_names = null;
public boolean requiresReads() { return true; }
private SAMFileHeader header;
public void initialize()
{
GenomeAnalysisEngine toolkit = this.getToolkit();
this.header = toolkit.getSAMFileHeader();
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
sample_names = new ArrayList<String>();
HashSet<String> unique_sample_names = new HashSet<String>();
for (int i = 0; i < read_groups.size(); i++)
{
String sample_name = read_groups.get(i).getSample();
if (unique_sample_names.contains(sample_name)) { continue; }
unique_sample_names.add(sample_name);
sample_names.add(sample_name);
}
}
public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
{
String line = context.getLocation().getContig() + " " + context.getLocation().getStart() + " " ;
AlignmentContext[] contexts = filterAlignmentContext(context, sample_names, 0);
HashMap<String,Integer> counts = countReadsBySample(context);
for (int i = 0; i < contexts.length; i++)
{
List<SAMRecord> reads = contexts[i].getReads();
List<Integer> offsets = contexts[i].getOffsets();
out.printf("%s %s ", context.getLocation(), sample_names.get(i));
int[] forward_counts = new int[4];
int[] backward_counts = new int[4];
for (int j = 0; j < reads.size(); j++)
{
SAMRecord read = reads.get(j);
int offset = offsets.get(j);
boolean backward = read.getReadNegativeStrandFlag();
char base = Character.toUpperCase((char)(read.getReadBases()[offset]));
if (BaseUtils.simpleBaseToBaseIndex(base) == -1) { continue; }
if (backward) { base = Character.toLowerCase(base); }
if (! backward) { forward_counts[BaseUtils.simpleBaseToBaseIndex(base)]++; }
else { backward_counts[BaseUtils.simpleBaseToBaseIndex(base)]++; }
//out.printf("%c", base);
}
out.printf("A[%d] C[%d] G[%d] T[%d] a[%d] c[%d] g[%d] t[%d]",
forward_counts[0],
forward_counts[1],
forward_counts[2],
forward_counts[3],
backward_counts[0],
backward_counts[1],
backward_counts[2],
backward_counts[3]);
out.printf("\n");
}
return "";
}
private HashMap<String,Integer> countReadsBySample(AlignmentContext context)
{
HashMap<String,Integer> counts = new HashMap<String,Integer>();
for (int i = 0; i < sample_names.size(); i++)
{
counts.put(sample_names.get(i), 0);
}
for (int i = 0; i < context.getReads().size(); i++)
{
SAMRecord read = context.getReads().get(i);
String sample = read.getReadGroup().getSample();
counts.put(sample, counts.get(sample)+1);
}
return counts;
}
private AlignmentContext[] filterAlignmentContext(AlignmentContext context, List<String> sample_names, int downsample)
{
HashMap<String,Integer> index = new HashMap<String,Integer>();
for (int i = 0; i < sample_names.size(); i++)
{
index.put(sample_names.get(i), i);
}
AlignmentContext[] contexts = new AlignmentContext[sample_names.size()];
ArrayList<SAMRecord>[] reads = new ArrayList[sample_names.size()];
ArrayList<Integer>[] offsets = new ArrayList[sample_names.size()];
for (int i = 0; i < sample_names.size(); i++)
{
reads[i] = new ArrayList<SAMRecord>();
offsets[i] = new ArrayList<Integer>();
}
for (int i = 0; i < context.getReads().size(); i++)
{
SAMRecord read = context.getReads().get(i);
Integer offset = context.getOffsets().get(i);
assert(header != null);
assert(read.getReadGroup() != null);
String sample = read.getReadGroup().getSample();
//if (SAMPLE_NAME_REGEX != null) { sample = sample.replaceAll(SAMPLE_NAME_REGEX, "$1"); }
reads[index.get(sample)].add(read);
offsets[index.get(sample)].add(offset);
}
if (downsample != 0)
{
for (int j = 0; j < reads.length; j++)
{
List<Integer> perm = new ArrayList<Integer>();
for (int i = 0; i < reads[j].size(); i++) { perm.add(i); }
perm = Utils.RandomSubset(perm, downsample);
ArrayList<SAMRecord> downsampled_reads = new ArrayList<SAMRecord>();
ArrayList<Integer> downsampled_offsets = new ArrayList<Integer>();
for (int i = 0; i < perm.size(); i++)
{
downsampled_reads.add(reads[j].get(perm.get(i)));
downsampled_offsets.add(offsets[j].get(perm.get(i)));
}
reads[j] = downsampled_reads;
offsets[j] = downsampled_offsets;
contexts[j] = new AlignmentContext(context.getLocation(), reads[j], offsets[j]);
}
}
else
{
for (int j = 0; j < reads.length; j++)
{
contexts[j] = new AlignmentContext(context.getLocation(), reads[j], offsets[j]);
}
}
return contexts;
}
public void onTraversalDone(String result)
{
return;
}
public String reduceInit()
{
return "";
}
public String reduce(String line, String sum)
{
return "";
}
}