- Optimized implementation of -byReadGroup in DoCWalker
- Added implementation of -bySample in DoCWalker - Removed CoverageBySample and added a watered down version to the examples directory git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2209 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7c73496e72
commit
b979bd2ced
|
|
@ -0,0 +1,68 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.examples;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class CoverageBySample extends LocusWalker<Integer, Integer> {
|
||||
|
||||
private HashSet<String> sampleNames = new HashSet<String>();
|
||||
|
||||
public boolean requiresReads() { return true; }
|
||||
|
||||
public void initialize() {
|
||||
|
||||
List<SAMReadGroupRecord> read_groups = this.getToolkit().getSAMFileHeader().getReadGroups();
|
||||
|
||||
for ( SAMReadGroupRecord record : read_groups ) {
|
||||
String sample = record.getSample();
|
||||
if ( sample != null )
|
||||
sampleNames.add(sample);
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
HashMap<String, Integer> depthBySample = new HashMap<String, Integer>();
|
||||
for ( String sample : sampleNames )
|
||||
depthBySample.put(sample, 0);
|
||||
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
|
||||
SAMReadGroupRecord readGroup = p.getRead().getReadGroup();
|
||||
if ( readGroup == null )
|
||||
continue;
|
||||
|
||||
String sample = readGroup.getSample();
|
||||
if ( sample != null ) {
|
||||
int oldDepth = depthBySample.get(sample);
|
||||
depthBySample.put(sample, oldDepth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
for ( Map.Entry<String, Integer> sample : depthBySample.entrySet() ) {
|
||||
out.printf(" %s %8d%n", sample.getKey(), sample.getValue());
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
out.println("Processed " + result + " loci.");
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum + value;
|
||||
}
|
||||
}
|
||||
|
|
@ -32,9 +32,7 @@ import org.broadinstitute.sting.utils.Pair;
|
|||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Collections;
|
||||
import java.util.ArrayList;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Display the depth of coverage at a given locus.
|
||||
|
|
@ -48,16 +46,25 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
}
|
||||
|
||||
@Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
|
||||
public printType printStyle = printType.COMPACT;
|
||||
protected printType printStyle = printType.COMPACT;
|
||||
|
||||
@Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
|
||||
public boolean excludeDeletionsInCoverage = false;
|
||||
protected boolean excludeDeletionsInCoverage = false;
|
||||
|
||||
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
|
||||
public int excludeMAPQBelowThis = -1;
|
||||
protected int excludeMAPQBelowThis = -1;
|
||||
|
||||
@Argument(fullName="byReadGroup", shortName="byRG", doc="List read depths for each read group")
|
||||
public boolean byReadGroup = false;
|
||||
protected boolean byReadGroup = false;
|
||||
|
||||
@Argument(fullName="bySample", shortName="bySample", doc="List read depths for each sample")
|
||||
protected boolean bySample = false;
|
||||
|
||||
|
||||
private TreeSet<String> readGroupNames = new TreeSet<String>();
|
||||
private TreeSet<String> sampleNames = new TreeSet<String>();
|
||||
|
||||
|
||||
|
||||
public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
|
||||
|
||||
|
|
@ -70,15 +77,32 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
|
||||
break;
|
||||
}
|
||||
|
||||
if ( byReadGroup ) {
|
||||
List<SAMReadGroupRecord> readGroups = this.getToolkit().getSAMFileHeader().getReadGroups();
|
||||
for ( SAMReadGroupRecord record : readGroups )
|
||||
readGroupNames.add(record.getReadGroupId());
|
||||
}
|
||||
|
||||
if ( bySample ) {
|
||||
List<SAMReadGroupRecord> readGroups = this.getToolkit().getSAMFileHeader().getReadGroups();
|
||||
for ( SAMReadGroupRecord record : readGroups ) {
|
||||
String sample = record.getSample();
|
||||
if ( sample != null )
|
||||
sampleNames.add(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
|
||||
|
||||
HashMap<String, Integer> depthByReadGroup = new HashMap<String, Integer>();
|
||||
for (SAMReadGroupRecord rg : this.getToolkit().getSAMFileHeader().getReadGroups()) {
|
||||
depthByReadGroup.put(rg.getReadGroupId(), 0);
|
||||
}
|
||||
for ( String readGroupName : readGroupNames )
|
||||
depthByReadGroup.put(readGroupName, 0);
|
||||
HashMap<String, Integer> depthBySample = new HashMap<String, Integer>();
|
||||
for ( String sample : sampleNames )
|
||||
depthBySample.put(sample, 0);
|
||||
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
|
|
@ -87,9 +111,23 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
else if ( p.isDeletion() ) nDeletionReads++;
|
||||
else nCleanReads++;
|
||||
|
||||
String readGroupName = p.getRead().getReadGroup().getReadGroupId();
|
||||
int oldDepth = depthByReadGroup.get(readGroupName);
|
||||
depthByReadGroup.put(readGroupName, oldDepth + 1);
|
||||
SAMReadGroupRecord readGroup = p.getRead().getReadGroup();
|
||||
if ( readGroup == null )
|
||||
continue;
|
||||
|
||||
if ( byReadGroup ) {
|
||||
String readGroupName = readGroup.getReadGroupId();
|
||||
int oldDepth = depthByReadGroup.get(readGroupName);
|
||||
depthByReadGroup.put(readGroupName, oldDepth + 1);
|
||||
}
|
||||
|
||||
if ( bySample ) {
|
||||
String sample = readGroup.getSample();
|
||||
if ( sample != null ) {
|
||||
int oldDepth = depthBySample.get(sample);
|
||||
depthBySample.put(sample, oldDepth + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
|
||||
|
|
@ -103,15 +141,18 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
break;
|
||||
}
|
||||
|
||||
if (byReadGroup) {
|
||||
ArrayList<String> rgs = new ArrayList<String>(depthByReadGroup.keySet());
|
||||
Collections.sort(rgs);
|
||||
|
||||
for (String rg : rgs) {
|
||||
if ( byReadGroup ) {
|
||||
for ( String rg : readGroupNames ) {
|
||||
out.printf(" %s %8d%n", rg, depthByReadGroup.get(rg));
|
||||
}
|
||||
}
|
||||
|
||||
if ( bySample ) {
|
||||
for ( String sample : sampleNames ) {
|
||||
out.printf(" %s %8d%n", sample, depthBySample.get(sample));
|
||||
}
|
||||
}
|
||||
|
||||
return nTotalReads;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,185 +0,0 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class CoverageBySample extends LocusWalker<String, String>
|
||||
{
|
||||
List<String> sample_names = null;
|
||||
|
||||
public boolean requiresReads() { return true; }
|
||||
|
||||
private SAMFileHeader header;
|
||||
|
||||
public void initialize()
|
||||
{
|
||||
GenomeAnalysisEngine toolkit = this.getToolkit();
|
||||
this.header = toolkit.getSAMFileHeader();
|
||||
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
|
||||
|
||||
sample_names = new ArrayList<String>();
|
||||
HashSet<String> unique_sample_names = new HashSet<String>();
|
||||
|
||||
for (int i = 0; i < read_groups.size(); i++)
|
||||
{
|
||||
String sample_name = read_groups.get(i).getSample();
|
||||
if (unique_sample_names.contains(sample_name)) { continue; }
|
||||
unique_sample_names.add(sample_name);
|
||||
sample_names.add(sample_name);
|
||||
}
|
||||
}
|
||||
|
||||
public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
|
||||
{
|
||||
String line = context.getLocation().getContig() + " " + context.getLocation().getStart() + " " ;
|
||||
|
||||
AlignmentContext[] contexts = filterAlignmentContext(context, sample_names, 0);
|
||||
|
||||
HashMap<String,Integer> counts = countReadsBySample(context);
|
||||
for (int i = 0; i < contexts.length; i++)
|
||||
{
|
||||
List<SAMRecord> reads = contexts[i].getReads();
|
||||
List<Integer> offsets = contexts[i].getOffsets();
|
||||
|
||||
out.printf("%s %s ", context.getLocation(), sample_names.get(i));
|
||||
|
||||
int[] forward_counts = new int[4];
|
||||
int[] backward_counts = new int[4];
|
||||
|
||||
for (int j = 0; j < reads.size(); j++)
|
||||
{
|
||||
SAMRecord read = reads.get(j);
|
||||
int offset = offsets.get(j);
|
||||
boolean backward = read.getReadNegativeStrandFlag();
|
||||
char base = Character.toUpperCase((char)(read.getReadBases()[offset]));
|
||||
|
||||
if (BaseUtils.simpleBaseToBaseIndex(base) == -1) { continue; }
|
||||
|
||||
if (backward) { base = Character.toLowerCase(base); }
|
||||
|
||||
if (! backward) { forward_counts[BaseUtils.simpleBaseToBaseIndex(base)]++; }
|
||||
else { backward_counts[BaseUtils.simpleBaseToBaseIndex(base)]++; }
|
||||
|
||||
//out.printf("%c", base);
|
||||
}
|
||||
out.printf("A[%d] C[%d] G[%d] T[%d] a[%d] c[%d] g[%d] t[%d]",
|
||||
forward_counts[0],
|
||||
forward_counts[1],
|
||||
forward_counts[2],
|
||||
forward_counts[3],
|
||||
backward_counts[0],
|
||||
backward_counts[1],
|
||||
backward_counts[2],
|
||||
backward_counts[3]);
|
||||
out.printf("\n");
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private HashMap<String,Integer> countReadsBySample(AlignmentContext context)
|
||||
{
|
||||
HashMap<String,Integer> counts = new HashMap<String,Integer>();
|
||||
for (int i = 0; i < sample_names.size(); i++)
|
||||
{
|
||||
counts.put(sample_names.get(i), 0);
|
||||
}
|
||||
for (int i = 0; i < context.getReads().size(); i++)
|
||||
{
|
||||
SAMRecord read = context.getReads().get(i);
|
||||
String sample = read.getReadGroup().getSample();
|
||||
counts.put(sample, counts.get(sample)+1);
|
||||
}
|
||||
return counts;
|
||||
}
|
||||
|
||||
private AlignmentContext[] filterAlignmentContext(AlignmentContext context, List<String> sample_names, int downsample)
|
||||
{
|
||||
HashMap<String,Integer> index = new HashMap<String,Integer>();
|
||||
for (int i = 0; i < sample_names.size(); i++)
|
||||
{
|
||||
index.put(sample_names.get(i), i);
|
||||
}
|
||||
|
||||
AlignmentContext[] contexts = new AlignmentContext[sample_names.size()];
|
||||
ArrayList<SAMRecord>[] reads = new ArrayList[sample_names.size()];
|
||||
ArrayList<Integer>[] offsets = new ArrayList[sample_names.size()];
|
||||
|
||||
for (int i = 0; i < sample_names.size(); i++)
|
||||
{
|
||||
reads[i] = new ArrayList<SAMRecord>();
|
||||
offsets[i] = new ArrayList<Integer>();
|
||||
}
|
||||
|
||||
for (int i = 0; i < context.getReads().size(); i++)
|
||||
{
|
||||
SAMRecord read = context.getReads().get(i);
|
||||
Integer offset = context.getOffsets().get(i);
|
||||
|
||||
assert(header != null);
|
||||
assert(read.getReadGroup() != null);
|
||||
|
||||
String sample = read.getReadGroup().getSample();
|
||||
//if (SAMPLE_NAME_REGEX != null) { sample = sample.replaceAll(SAMPLE_NAME_REGEX, "$1"); }
|
||||
reads[index.get(sample)].add(read);
|
||||
offsets[index.get(sample)].add(offset);
|
||||
}
|
||||
|
||||
if (downsample != 0)
|
||||
{
|
||||
for (int j = 0; j < reads.length; j++)
|
||||
{
|
||||
List<Integer> perm = new ArrayList<Integer>();
|
||||
for (int i = 0; i < reads[j].size(); i++) { perm.add(i); }
|
||||
perm = Utils.RandomSubset(perm, downsample);
|
||||
|
||||
ArrayList<SAMRecord> downsampled_reads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> downsampled_offsets = new ArrayList<Integer>();
|
||||
|
||||
for (int i = 0; i < perm.size(); i++)
|
||||
{
|
||||
downsampled_reads.add(reads[j].get(perm.get(i)));
|
||||
downsampled_offsets.add(offsets[j].get(perm.get(i)));
|
||||
}
|
||||
|
||||
reads[j] = downsampled_reads;
|
||||
offsets[j] = downsampled_offsets;
|
||||
contexts[j] = new AlignmentContext(context.getLocation(), reads[j], offsets[j]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = 0; j < reads.length; j++)
|
||||
{
|
||||
contexts[j] = new AlignmentContext(context.getLocation(), reads[j], offsets[j]);
|
||||
}
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
public void onTraversalDone(String result)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
public String reduceInit()
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
public String reduce(String line, String sum)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue