- Optimized implementation of -byReadGroup in DoCWalker
- Added implementation of -bySample in DoCWalker - Removed CoverageBySample and added a watered down version to the examples directory git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2209 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7c73496e72
commit
b979bd2ced
|
|
@ -0,0 +1,68 @@
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.examples;
|
||||||
|
|
||||||
|
import net.sf.samtools.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.*;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class CoverageBySample extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
private HashSet<String> sampleNames = new HashSet<String>();
|
||||||
|
|
||||||
|
public boolean requiresReads() { return true; }
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
|
||||||
|
List<SAMReadGroupRecord> read_groups = this.getToolkit().getSAMFileHeader().getReadGroups();
|
||||||
|
|
||||||
|
for ( SAMReadGroupRecord record : read_groups ) {
|
||||||
|
String sample = record.getSample();
|
||||||
|
if ( sample != null )
|
||||||
|
sampleNames.add(sample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
||||||
|
HashMap<String, Integer> depthBySample = new HashMap<String, Integer>();
|
||||||
|
for ( String sample : sampleNames )
|
||||||
|
depthBySample.put(sample, 0);
|
||||||
|
|
||||||
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
|
for ( PileupElement p : pileup ) {
|
||||||
|
|
||||||
|
SAMReadGroupRecord readGroup = p.getRead().getReadGroup();
|
||||||
|
if ( readGroup == null )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
String sample = readGroup.getSample();
|
||||||
|
if ( sample != null ) {
|
||||||
|
int oldDepth = depthBySample.get(sample);
|
||||||
|
depthBySample.put(sample, oldDepth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( Map.Entry<String, Integer> sample : depthBySample.entrySet() ) {
|
||||||
|
out.printf(" %s %8d%n", sample.getKey(), sample.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
out.println("Processed " + result + " loci.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return sum + value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -32,9 +32,7 @@ import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Display the depth of coverage at a given locus.
|
* Display the depth of coverage at a given locus.
|
||||||
|
|
@ -48,16 +46,25 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
||||||
}
|
}
|
||||||
|
|
||||||
@Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
|
@Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
|
||||||
public printType printStyle = printType.COMPACT;
|
protected printType printStyle = printType.COMPACT;
|
||||||
|
|
||||||
@Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
|
@Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
|
||||||
public boolean excludeDeletionsInCoverage = false;
|
protected boolean excludeDeletionsInCoverage = false;
|
||||||
|
|
||||||
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
|
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
|
||||||
public int excludeMAPQBelowThis = -1;
|
protected int excludeMAPQBelowThis = -1;
|
||||||
|
|
||||||
@Argument(fullName="byReadGroup", shortName="byRG", doc="List read depths for each read group")
|
@Argument(fullName="byReadGroup", shortName="byRG", doc="List read depths for each read group")
|
||||||
public boolean byReadGroup = false;
|
protected boolean byReadGroup = false;
|
||||||
|
|
||||||
|
@Argument(fullName="bySample", shortName="bySample", doc="List read depths for each sample")
|
||||||
|
protected boolean bySample = false;
|
||||||
|
|
||||||
|
|
||||||
|
private TreeSet<String> readGroupNames = new TreeSet<String>();
|
||||||
|
private TreeSet<String> sampleNames = new TreeSet<String>();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
|
public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
|
||||||
|
|
||||||
|
|
@ -70,15 +77,32 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
||||||
out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
|
out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( byReadGroup ) {
|
||||||
|
List<SAMReadGroupRecord> readGroups = this.getToolkit().getSAMFileHeader().getReadGroups();
|
||||||
|
for ( SAMReadGroupRecord record : readGroups )
|
||||||
|
readGroupNames.add(record.getReadGroupId());
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( bySample ) {
|
||||||
|
List<SAMReadGroupRecord> readGroups = this.getToolkit().getSAMFileHeader().getReadGroups();
|
||||||
|
for ( SAMReadGroupRecord record : readGroups ) {
|
||||||
|
String sample = record.getSample();
|
||||||
|
if ( sample != null )
|
||||||
|
sampleNames.add(sample);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
|
int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
|
||||||
|
|
||||||
HashMap<String, Integer> depthByReadGroup = new HashMap<String, Integer>();
|
HashMap<String, Integer> depthByReadGroup = new HashMap<String, Integer>();
|
||||||
for (SAMReadGroupRecord rg : this.getToolkit().getSAMFileHeader().getReadGroups()) {
|
for ( String readGroupName : readGroupNames )
|
||||||
depthByReadGroup.put(rg.getReadGroupId(), 0);
|
depthByReadGroup.put(readGroupName, 0);
|
||||||
}
|
HashMap<String, Integer> depthBySample = new HashMap<String, Integer>();
|
||||||
|
for ( String sample : sampleNames )
|
||||||
|
depthBySample.put(sample, 0);
|
||||||
|
|
||||||
ReadBackedPileup pileup = context.getPileup();
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
for ( PileupElement p : pileup ) {
|
for ( PileupElement p : pileup ) {
|
||||||
|
|
@ -87,9 +111,23 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
||||||
else if ( p.isDeletion() ) nDeletionReads++;
|
else if ( p.isDeletion() ) nDeletionReads++;
|
||||||
else nCleanReads++;
|
else nCleanReads++;
|
||||||
|
|
||||||
String readGroupName = p.getRead().getReadGroup().getReadGroupId();
|
SAMReadGroupRecord readGroup = p.getRead().getReadGroup();
|
||||||
int oldDepth = depthByReadGroup.get(readGroupName);
|
if ( readGroup == null )
|
||||||
depthByReadGroup.put(readGroupName, oldDepth + 1);
|
continue;
|
||||||
|
|
||||||
|
if ( byReadGroup ) {
|
||||||
|
String readGroupName = readGroup.getReadGroupId();
|
||||||
|
int oldDepth = depthByReadGroup.get(readGroupName);
|
||||||
|
depthByReadGroup.put(readGroupName, oldDepth + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( bySample ) {
|
||||||
|
String sample = readGroup.getSample();
|
||||||
|
if ( sample != null ) {
|
||||||
|
int oldDepth = depthBySample.get(sample);
|
||||||
|
depthBySample.put(sample, oldDepth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
|
int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
|
||||||
|
|
@ -103,15 +141,18 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (byReadGroup) {
|
if ( byReadGroup ) {
|
||||||
ArrayList<String> rgs = new ArrayList<String>(depthByReadGroup.keySet());
|
for ( String rg : readGroupNames ) {
|
||||||
Collections.sort(rgs);
|
|
||||||
|
|
||||||
for (String rg : rgs) {
|
|
||||||
out.printf(" %s %8d%n", rg, depthByReadGroup.get(rg));
|
out.printf(" %s %8d%n", rg, depthByReadGroup.get(rg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( bySample ) {
|
||||||
|
for ( String sample : sampleNames ) {
|
||||||
|
out.printf(" %s %8d%n", sample, depthBySample.get(sample));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nTotalReads;
|
return nTotalReads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,185 +0,0 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
|
||||||
import org.broadinstitute.sting.gatk.*;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
public class CoverageBySample extends LocusWalker<String, String>
|
|
||||||
{
|
|
||||||
List<String> sample_names = null;
|
|
||||||
|
|
||||||
public boolean requiresReads() { return true; }
|
|
||||||
|
|
||||||
private SAMFileHeader header;
|
|
||||||
|
|
||||||
public void initialize()
|
|
||||||
{
|
|
||||||
GenomeAnalysisEngine toolkit = this.getToolkit();
|
|
||||||
this.header = toolkit.getSAMFileHeader();
|
|
||||||
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
|
|
||||||
|
|
||||||
sample_names = new ArrayList<String>();
|
|
||||||
HashSet<String> unique_sample_names = new HashSet<String>();
|
|
||||||
|
|
||||||
for (int i = 0; i < read_groups.size(); i++)
|
|
||||||
{
|
|
||||||
String sample_name = read_groups.get(i).getSample();
|
|
||||||
if (unique_sample_names.contains(sample_name)) { continue; }
|
|
||||||
unique_sample_names.add(sample_name);
|
|
||||||
sample_names.add(sample_name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
|
|
||||||
{
|
|
||||||
String line = context.getLocation().getContig() + " " + context.getLocation().getStart() + " " ;
|
|
||||||
|
|
||||||
AlignmentContext[] contexts = filterAlignmentContext(context, sample_names, 0);
|
|
||||||
|
|
||||||
HashMap<String,Integer> counts = countReadsBySample(context);
|
|
||||||
for (int i = 0; i < contexts.length; i++)
|
|
||||||
{
|
|
||||||
List<SAMRecord> reads = contexts[i].getReads();
|
|
||||||
List<Integer> offsets = contexts[i].getOffsets();
|
|
||||||
|
|
||||||
out.printf("%s %s ", context.getLocation(), sample_names.get(i));
|
|
||||||
|
|
||||||
int[] forward_counts = new int[4];
|
|
||||||
int[] backward_counts = new int[4];
|
|
||||||
|
|
||||||
for (int j = 0; j < reads.size(); j++)
|
|
||||||
{
|
|
||||||
SAMRecord read = reads.get(j);
|
|
||||||
int offset = offsets.get(j);
|
|
||||||
boolean backward = read.getReadNegativeStrandFlag();
|
|
||||||
char base = Character.toUpperCase((char)(read.getReadBases()[offset]));
|
|
||||||
|
|
||||||
if (BaseUtils.simpleBaseToBaseIndex(base) == -1) { continue; }
|
|
||||||
|
|
||||||
if (backward) { base = Character.toLowerCase(base); }
|
|
||||||
|
|
||||||
if (! backward) { forward_counts[BaseUtils.simpleBaseToBaseIndex(base)]++; }
|
|
||||||
else { backward_counts[BaseUtils.simpleBaseToBaseIndex(base)]++; }
|
|
||||||
|
|
||||||
//out.printf("%c", base);
|
|
||||||
}
|
|
||||||
out.printf("A[%d] C[%d] G[%d] T[%d] a[%d] c[%d] g[%d] t[%d]",
|
|
||||||
forward_counts[0],
|
|
||||||
forward_counts[1],
|
|
||||||
forward_counts[2],
|
|
||||||
forward_counts[3],
|
|
||||||
backward_counts[0],
|
|
||||||
backward_counts[1],
|
|
||||||
backward_counts[2],
|
|
||||||
backward_counts[3]);
|
|
||||||
out.printf("\n");
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
private HashMap<String,Integer> countReadsBySample(AlignmentContext context)
|
|
||||||
{
|
|
||||||
HashMap<String,Integer> counts = new HashMap<String,Integer>();
|
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
|
||||||
{
|
|
||||||
counts.put(sample_names.get(i), 0);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < context.getReads().size(); i++)
|
|
||||||
{
|
|
||||||
SAMRecord read = context.getReads().get(i);
|
|
||||||
String sample = read.getReadGroup().getSample();
|
|
||||||
counts.put(sample, counts.get(sample)+1);
|
|
||||||
}
|
|
||||||
return counts;
|
|
||||||
}
|
|
||||||
|
|
||||||
private AlignmentContext[] filterAlignmentContext(AlignmentContext context, List<String> sample_names, int downsample)
|
|
||||||
{
|
|
||||||
HashMap<String,Integer> index = new HashMap<String,Integer>();
|
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
|
||||||
{
|
|
||||||
index.put(sample_names.get(i), i);
|
|
||||||
}
|
|
||||||
|
|
||||||
AlignmentContext[] contexts = new AlignmentContext[sample_names.size()];
|
|
||||||
ArrayList<SAMRecord>[] reads = new ArrayList[sample_names.size()];
|
|
||||||
ArrayList<Integer>[] offsets = new ArrayList[sample_names.size()];
|
|
||||||
|
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
|
||||||
{
|
|
||||||
reads[i] = new ArrayList<SAMRecord>();
|
|
||||||
offsets[i] = new ArrayList<Integer>();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < context.getReads().size(); i++)
|
|
||||||
{
|
|
||||||
SAMRecord read = context.getReads().get(i);
|
|
||||||
Integer offset = context.getOffsets().get(i);
|
|
||||||
|
|
||||||
assert(header != null);
|
|
||||||
assert(read.getReadGroup() != null);
|
|
||||||
|
|
||||||
String sample = read.getReadGroup().getSample();
|
|
||||||
//if (SAMPLE_NAME_REGEX != null) { sample = sample.replaceAll(SAMPLE_NAME_REGEX, "$1"); }
|
|
||||||
reads[index.get(sample)].add(read);
|
|
||||||
offsets[index.get(sample)].add(offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (downsample != 0)
|
|
||||||
{
|
|
||||||
for (int j = 0; j < reads.length; j++)
|
|
||||||
{
|
|
||||||
List<Integer> perm = new ArrayList<Integer>();
|
|
||||||
for (int i = 0; i < reads[j].size(); i++) { perm.add(i); }
|
|
||||||
perm = Utils.RandomSubset(perm, downsample);
|
|
||||||
|
|
||||||
ArrayList<SAMRecord> downsampled_reads = new ArrayList<SAMRecord>();
|
|
||||||
ArrayList<Integer> downsampled_offsets = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
for (int i = 0; i < perm.size(); i++)
|
|
||||||
{
|
|
||||||
downsampled_reads.add(reads[j].get(perm.get(i)));
|
|
||||||
downsampled_offsets.add(offsets[j].get(perm.get(i)));
|
|
||||||
}
|
|
||||||
|
|
||||||
reads[j] = downsampled_reads;
|
|
||||||
offsets[j] = downsampled_offsets;
|
|
||||||
contexts[j] = new AlignmentContext(context.getLocation(), reads[j], offsets[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int j = 0; j < reads.length; j++)
|
|
||||||
{
|
|
||||||
contexts[j] = new AlignmentContext(context.getLocation(), reads[j], offsets[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return contexts;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraversalDone(String result)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String reduceInit()
|
|
||||||
{
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String reduce(String line, String sum)
|
|
||||||
{
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue