removing; should of gone to the CGA repo
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5633 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
da6f2d3c9d
commit
2089c3bdef
|
|
@ -1,135 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.capseg;
|
|
||||||
|
|
||||||
import org.broad.tribble.util.variantcontext.Allele;
|
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.By;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.PrintWriter;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the allele counts at loci that overlap both the bait list and DbSNP
|
|
||||||
*/
|
|
||||||
//@By(DataSource.REFERENCE_ORDERED_DATA)
|
|
||||||
public class AlleleCountWalker extends LocusWalker<Integer, Integer> {
|
|
||||||
@Argument(doc = "our output file name", shortName = "fl")
|
|
||||||
File output;
|
|
||||||
|
|
||||||
String dbTrack = "DbSNP";
|
|
||||||
String callTrack = "calls";
|
|
||||||
Random generator = new Random();
|
|
||||||
private PrintWriter out;
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
try {
|
|
||||||
out = new PrintWriter(new FileWriter(output));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new IllegalArgumentException("Uncaught exception!");
|
|
||||||
}
|
|
||||||
out.println("contig,position,A,B,aBase,bBase,dbSNPOrientation");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
// check that the tracker and the context are not null, and we a variant, and some amount of bases
|
|
||||||
if (tracker == null || context == null || tracker.getAllVariantContexts(ref).size() < 1 || context.getBasePileup().size() == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
// get the het
|
|
||||||
VariantContext het = tracker.getVariantContext(ref, callTrack, context.getLocation());
|
|
||||||
VariantContext dbSNP = tracker.getVariantContext(ref, dbTrack, context.getLocation());
|
|
||||||
if (het == null || het.getHetCount() == 0) return 0;
|
|
||||||
|
|
||||||
|
|
||||||
byte[] bases = context.getBasePileup().getBases();
|
|
||||||
int[] counts = new int[5];
|
|
||||||
for (byte b : bases) {
|
|
||||||
if (b == 'a' || b == 'A') counts[0]++;
|
|
||||||
else if (b == 'c' || b == 'C') counts[1]++;
|
|
||||||
else if (b == 'g' || b == 'G') counts[2]++;
|
|
||||||
else if (b == 't' || b == 'T') counts[3]++;
|
|
||||||
else counts[4]++;
|
|
||||||
}
|
|
||||||
int aIndex = 0;
|
|
||||||
byte a = 'a';
|
|
||||||
|
|
||||||
// find the maximumvalue
|
|
||||||
for (int index = 0; index < counts.length; index++)
|
|
||||||
if (counts[index] > counts[aIndex]) {
|
|
||||||
aIndex = index;
|
|
||||||
a = (byte) ((index < 1) ? 'a' : (index < 2) ? 'c' : (index < 3) ? 'g' : 't');
|
|
||||||
}
|
|
||||||
|
|
||||||
int bIndex = (aIndex != 0) ? 0 : 1;
|
|
||||||
byte b = 'a';
|
|
||||||
for (int index = 0; index < counts.length; index++)
|
|
||||||
if (counts[index] > counts[bIndex] && index != aIndex) {
|
|
||||||
bIndex = index;
|
|
||||||
b = (byte) ((index < 1) ? 'a' : (index < 2) ? 'c' : (index < 3) ? 'g' : 't');
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean usedDbSNP = false;
|
|
||||||
// check if there is an ordering that we'd like to subscribe to
|
|
||||||
if (dbSNP != null) {
|
|
||||||
// get the alt from the DbSNP file
|
|
||||||
Set<Allele> alts = dbSNP.getAlternateAlleles();
|
|
||||||
|
|
||||||
// if true, swap the two
|
|
||||||
if (alts.size() == 1 && alts.iterator().next().basesMatch(String.valueOf((char) a))) {
|
|
||||||
byte temp = a;
|
|
||||||
a = b;
|
|
||||||
b = temp;
|
|
||||||
|
|
||||||
int tmp = aIndex;
|
|
||||||
aIndex = bIndex;
|
|
||||||
bIndex = tmp;
|
|
||||||
usedDbSNP = true;
|
|
||||||
} else if (alts.size() == 1 && alts.iterator().next().basesMatch(String.valueOf((char) b))) {
|
|
||||||
usedDbSNP = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!usedDbSNP && generator.nextDouble() > 0.5) {
|
|
||||||
|
|
||||||
byte temp = a;
|
|
||||||
a = b;
|
|
||||||
b = temp;
|
|
||||||
|
|
||||||
int tmp = aIndex;
|
|
||||||
aIndex = bIndex;
|
|
||||||
bIndex = tmp;
|
|
||||||
}
|
|
||||||
if (counts[aIndex] == 0 && counts[bIndex] == 0)
|
|
||||||
return 0;
|
|
||||||
out.println(context.getLocation().getContig() + "," +
|
|
||||||
context.getLocation().getStart() + "," + counts[aIndex] + "," + counts[bIndex] + "," + (char) a + "," + (char) b + "," + usedDbSNP);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Integer reduceInit() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
|
||||||
return value + sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraversalDone(Integer result) {
|
|
||||||
out.close();
|
|
||||||
logger.info("[REDUCE RESULT] Traversal result is: " + result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,224 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.capseg;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broad.tribble.bed.FullBEDFeature;
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This tool captures baits depth (as reads per bait) across all baits and
|
|
||||||
* lanes. The output is one line per bait, with columns for each of the
|
|
||||||
* lanes. Reads which overlap two or more baits are added to all baits that
|
|
||||||
* the read overlaps.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class BaitDepthWalker extends ReadWalker<ReadStats, ReadStats> {
|
|
||||||
|
|
||||||
// what sample we should assign to our unknown reads
|
|
||||||
private static final String UNKNOWN_SAMPLE_NAME = "unknown";
|
|
||||||
|
|
||||||
// what separator to use
|
|
||||||
private static final String SEPERATOR = ",";
|
|
||||||
|
|
||||||
// the current output location
|
|
||||||
@Output(doc="File to which baits and coverage information for lanes should be written",required=true)
|
|
||||||
PrintStream out;
|
|
||||||
|
|
||||||
String currentBait = null;
|
|
||||||
|
|
||||||
// our current bait name
|
|
||||||
String baitName = null;
|
|
||||||
|
|
||||||
// create a coverage mapping by readGroup
|
|
||||||
HashMap<String,Integer> currentCoverage = new HashMap<String,Integer>();
|
|
||||||
|
|
||||||
// set of all of then read groups
|
|
||||||
Set<String> readGroups = new LinkedHashSet<String>();
|
|
||||||
|
|
||||||
// set containing the baits and their positions, used for metrics during and after the run
|
|
||||||
HashMap<String,GenomeLoc> baitMappings = new HashMap<String,GenomeLoc>();
|
|
||||||
|
|
||||||
// a structure to hold reads that align to other baits (other than the one we're currently processing)
|
|
||||||
// baits should be contiguous, so reads should not be left in this queue once we've passed the appropriate bait
|
|
||||||
HashMap<String,ArrayList<String>> recordsForOtherBaits = new HashMap<String,ArrayList<String>>();
|
|
||||||
|
|
||||||
// a mapping of the read group to the bam
|
|
||||||
HashMap<String,String> readGroupToBam = new HashMap<String,String>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get all of the read groups from the BAM headers, initializing our maps and arrays with this information
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void initialize() {
|
|
||||||
int bamNumber = 0;
|
|
||||||
for (Set<String> set : getToolkit().getMergedReadGroupsByReaders())
|
|
||||||
for (String smp : set) {
|
|
||||||
readGroups.add(smp);
|
|
||||||
readGroupToBam.put(smp,String.valueOf(bamNumber));
|
|
||||||
bamNumber++;
|
|
||||||
}
|
|
||||||
out.print("#bait,location");
|
|
||||||
for (String st : readGroups) {
|
|
||||||
out.print(SEPERATOR);
|
|
||||||
out.print(st);
|
|
||||||
currentCoverage.put(st,0);
|
|
||||||
}
|
|
||||||
out.println();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the read, determine which baits it falls into, and update our matrix
|
|
||||||
* @param ref the reference bases
|
|
||||||
* @param read the sequencer read
|
|
||||||
* @param metaDataTracker our metadata, used to pull in the BED file
|
|
||||||
* @return a read stat object, which tracks how many reads fell into baits, outside of baits, etc
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ReadStats map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
|
||||||
// setup the return value
|
|
||||||
ReadStats stats = new ReadStats();
|
|
||||||
|
|
||||||
// get a list of the features
|
|
||||||
List<GATKFeature> features = metaDataTracker.getAllCoveringRods();
|
|
||||||
if (features.size() == 0) {
|
|
||||||
stats.addRead(0);
|
|
||||||
} else {
|
|
||||||
LinkedHashSet<String> coveringBaits = new LinkedHashSet<String>();
|
|
||||||
for (GATKFeature feature : features) {
|
|
||||||
if (feature.getUnderlyingObject() instanceof FullBEDFeature &&
|
|
||||||
!((FullBEDFeature)feature.getUnderlyingObject()).getName().contains("ooster")) {
|
|
||||||
String name = ((FullBEDFeature)feature.getUnderlyingObject()).getName();
|
|
||||||
coveringBaits.add(name);
|
|
||||||
if (!baitMappings.containsKey(name)) {
|
|
||||||
baitMappings.put(name,feature.getLocation());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (coveringBaits.size() == 1)
|
|
||||||
addToBaits(coveringBaits.iterator().next(),read.getReadGroup().getReadGroupId());
|
|
||||||
else {
|
|
||||||
List<String> baits = new ArrayList<String>(coveringBaits);
|
|
||||||
for (String bait : baits)
|
|
||||||
if (bait.equals(currentBait))
|
|
||||||
addToBaits(bait,read.getReadGroup().getReadGroupId());
|
|
||||||
else {
|
|
||||||
if (!recordsForOtherBaits.containsKey(bait))
|
|
||||||
recordsForOtherBaits.put(bait,new ArrayList<String>());
|
|
||||||
recordsForOtherBaits.get(bait).add(read.getReadGroup().getReadGroupId());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
stats.addRead(coveringBaits.size());
|
|
||||||
}
|
|
||||||
return stats;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* add a read to a particular bait name
|
|
||||||
* @param baitName the bait name
|
|
||||||
* @param readGroup the read group (lane) the read came from
|
|
||||||
*/
|
|
||||||
public void addToBaits(String baitName, String readGroup) {
|
|
||||||
// if we haven't seen the bait name yet, create a hashmap for it and dump the currentContents to disk
|
|
||||||
if (currentBait == null || !currentBait.equals(baitName)) {
|
|
||||||
if (currentBait != null) {
|
|
||||||
out.print(currentBait + SEPERATOR);
|
|
||||||
currentBait = baitName;
|
|
||||||
out.print(baitMappings.get(baitName));
|
|
||||||
for (String rg : readGroups) {
|
|
||||||
out.print(SEPERATOR + currentCoverage.get(rg));
|
|
||||||
currentCoverage.put(rg,0);
|
|
||||||
}
|
|
||||||
|
|
||||||
out.println();
|
|
||||||
if (recordsForOtherBaits.containsKey(baitName)) {
|
|
||||||
ArrayList<String> records = recordsForOtherBaits.get(baitName);
|
|
||||||
recordsForOtherBaits.remove(baitName);
|
|
||||||
for (String rec : records)
|
|
||||||
addToBaits(baitName,rec);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
currentBait = baitName;
|
|
||||||
|
|
||||||
}
|
|
||||||
if (!readGroups.contains(readGroup))
|
|
||||||
throw new IllegalArgumentException("Novel (unseen) sample name " + readGroup + " is not in the header of any of the BAM files");
|
|
||||||
else
|
|
||||||
currentCoverage.put(readGroup, currentCoverage.get(readGroup) + 1);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ReadStats reduceInit() {
|
|
||||||
return new ReadStats();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ReadStats reduce(ReadStats value, ReadStats sum) {
|
|
||||||
return sum.add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* emit the last bait, any remaining baits, and the read stats
|
|
||||||
* @param result the reads stats object
|
|
||||||
*/
|
|
||||||
public void onTraversalDone(ReadStats result) {
|
|
||||||
out.print(baitName + SEPERATOR);
|
|
||||||
out.print(baitMappings.get(baitName));
|
|
||||||
for (String rg : this.readGroups)
|
|
||||||
out.print(SEPERATOR + currentCoverage.get(rg));
|
|
||||||
out.println();
|
|
||||||
/*for (Map.Entry<String,ArrayList<String>> entry : recordsForOtherBaits.entrySet()) {
|
|
||||||
out.print(entry.getKey() + SEPERATOR);
|
|
||||||
out.print(baitMappings.get(baitName));
|
|
||||||
for (String rg : this.readGroups) {
|
|
||||||
int index = entry.getValue().indexOf(rg);
|
|
||||||
///out.print(SEPERATOR + (index < 0 ? 0 : entry.getValue().));
|
|
||||||
}
|
|
||||||
out.println();
|
|
||||||
}*/
|
|
||||||
|
|
||||||
currentBait = baitName;
|
|
||||||
currentCoverage.clear();
|
|
||||||
out.flush();
|
|
||||||
out.close();
|
|
||||||
logger.info("[REDUCE RESULT] Traversal result is: " + result);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// a small helper class for storing read statistics
|
|
||||||
class ReadStats {
|
|
||||||
int numberInBait = 0;
|
|
||||||
int numberOutsideOfBait = 0;
|
|
||||||
int baitToReadCount = 0; // the number of baits we've seen, used to determine the bait/read ratio
|
|
||||||
int readCount = 0;
|
|
||||||
|
|
||||||
public void addRead(int baitCount) {
|
|
||||||
baitToReadCount += baitCount;
|
|
||||||
numberInBait += (baitCount > 0) ? 1 : 0;
|
|
||||||
numberOutsideOfBait += (baitCount > 0) ? 0 : 1;
|
|
||||||
readCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return "Number in baits = " + numberInBait + ", number outside of baits = " + numberOutsideOfBait + ", baits per read" + (baitToReadCount/readCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
public org.broadinstitute.sting.gatk.walkers.capseg.ReadStats add(org.broadinstitute.sting.gatk.walkers.capseg.ReadStats stat) {
|
|
||||||
this.numberInBait += stat.numberInBait;
|
|
||||||
this.numberOutsideOfBait += stat.numberOutsideOfBait;
|
|
||||||
this.baitToReadCount += stat.baitToReadCount;
|
|
||||||
this.readCount += stat.readCount;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,83 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.capseg;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|
||||||
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.PrintWriter;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: aaron
|
|
||||||
* Date: 3/7/11
|
|
||||||
* Time: 3:40 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class SampleToLaneWalker extends ReadWalker<Integer,Integer> {
|
|
||||||
|
|
||||||
@Argument(doc="Sample to read group file", shortName="strf", required=true)
|
|
||||||
BufferedOutputStream sampleToReadGroup;
|
|
||||||
|
|
||||||
@Output(doc="BAM file to sample file", shortName="bs")
|
|
||||||
BufferedOutputStream bamToSample;
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
PrintWriter sr_writer = new PrintWriter(sampleToReadGroup);
|
|
||||||
PrintWriter bs_writer = new PrintWriter(bamToSample);
|
|
||||||
|
|
||||||
// write out the sample to read-group file
|
|
||||||
sr_writer.println("Sample\tReadGroup");
|
|
||||||
SAMFileHeader header = getToolkit().getSAMFileHeader();
|
|
||||||
for (SAMReadGroupRecord rec : header.getReadGroups()) {
|
|
||||||
Sample sample = getToolkit().getSampleByReadGroup(rec);
|
|
||||||
sr_writer.println(sample.getId() + "\t" + rec.getReadGroupId());
|
|
||||||
}
|
|
||||||
sr_writer.flush();
|
|
||||||
|
|
||||||
// write out the bam file to the sample information
|
|
||||||
bs_writer.println("BAM\tSample");
|
|
||||||
for (SAMReaderID rec : getToolkit().getReadsDataSource().getReaderIDs()) {
|
|
||||||
File fl = getToolkit().getSourceFileForReaderID(rec);
|
|
||||||
Iterator<SAMReadGroupRecord> iter = getToolkit().getSAMFileHeader(rec).getReadGroups().iterator();
|
|
||||||
Set<String> names = new HashSet <String>();
|
|
||||||
while (iter.hasNext())
|
|
||||||
names.add(iter.next().getSample());
|
|
||||||
Iterator<String> strs = names.iterator();
|
|
||||||
String rg = "";
|
|
||||||
if (strs.hasNext())
|
|
||||||
rg = strs.next();
|
|
||||||
while (strs.hasNext()) {
|
|
||||||
rg = rg + ";" + strs.next();
|
|
||||||
}
|
|
||||||
bs_writer.println(fl.getName() + "\t" + rg);
|
|
||||||
}
|
|
||||||
bs_writer.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
|
||||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Integer reduceInit() {
|
|
||||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
|
||||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue