removing; should of gone to the CGA repo
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5633 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
da6f2d3c9d
commit
2089c3bdef
|
|
@ -1,135 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.capseg;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* get the allele counts at loci that overlap both the bait list and DbSNP
|
||||
*/
|
||||
//@By(DataSource.REFERENCE_ORDERED_DATA)
|
||||
public class AlleleCountWalker extends LocusWalker<Integer, Integer> {
|
||||
@Argument(doc = "our output file name", shortName = "fl")
|
||||
File output;
|
||||
|
||||
String dbTrack = "DbSNP";
|
||||
String callTrack = "calls";
|
||||
Random generator = new Random();
|
||||
private PrintWriter out;
|
||||
|
||||
public void initialize() {
|
||||
try {
|
||||
out = new PrintWriter(new FileWriter(output));
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("Uncaught exception!");
|
||||
}
|
||||
out.println("contig,position,A,B,aBase,bBase,dbSNPOrientation");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
// check that the tracker and the context are not null, and we a variant, and some amount of bases
|
||||
if (tracker == null || context == null || tracker.getAllVariantContexts(ref).size() < 1 || context.getBasePileup().size() == 0)
|
||||
return 0;
|
||||
|
||||
// get the het
|
||||
VariantContext het = tracker.getVariantContext(ref, callTrack, context.getLocation());
|
||||
VariantContext dbSNP = tracker.getVariantContext(ref, dbTrack, context.getLocation());
|
||||
if (het == null || het.getHetCount() == 0) return 0;
|
||||
|
||||
|
||||
byte[] bases = context.getBasePileup().getBases();
|
||||
int[] counts = new int[5];
|
||||
for (byte b : bases) {
|
||||
if (b == 'a' || b == 'A') counts[0]++;
|
||||
else if (b == 'c' || b == 'C') counts[1]++;
|
||||
else if (b == 'g' || b == 'G') counts[2]++;
|
||||
else if (b == 't' || b == 'T') counts[3]++;
|
||||
else counts[4]++;
|
||||
}
|
||||
int aIndex = 0;
|
||||
byte a = 'a';
|
||||
|
||||
// find the maximumvalue
|
||||
for (int index = 0; index < counts.length; index++)
|
||||
if (counts[index] > counts[aIndex]) {
|
||||
aIndex = index;
|
||||
a = (byte) ((index < 1) ? 'a' : (index < 2) ? 'c' : (index < 3) ? 'g' : 't');
|
||||
}
|
||||
|
||||
int bIndex = (aIndex != 0) ? 0 : 1;
|
||||
byte b = 'a';
|
||||
for (int index = 0; index < counts.length; index++)
|
||||
if (counts[index] > counts[bIndex] && index != aIndex) {
|
||||
bIndex = index;
|
||||
b = (byte) ((index < 1) ? 'a' : (index < 2) ? 'c' : (index < 3) ? 'g' : 't');
|
||||
}
|
||||
|
||||
boolean usedDbSNP = false;
|
||||
// check if there is an ordering that we'd like to subscribe to
|
||||
if (dbSNP != null) {
|
||||
// get the alt from the DbSNP file
|
||||
Set<Allele> alts = dbSNP.getAlternateAlleles();
|
||||
|
||||
// if true, swap the two
|
||||
if (alts.size() == 1 && alts.iterator().next().basesMatch(String.valueOf((char) a))) {
|
||||
byte temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
|
||||
int tmp = aIndex;
|
||||
aIndex = bIndex;
|
||||
bIndex = tmp;
|
||||
usedDbSNP = true;
|
||||
} else if (alts.size() == 1 && alts.iterator().next().basesMatch(String.valueOf((char) b))) {
|
||||
usedDbSNP = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!usedDbSNP && generator.nextDouble() > 0.5) {
|
||||
|
||||
byte temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
|
||||
int tmp = aIndex;
|
||||
aIndex = bIndex;
|
||||
bIndex = tmp;
|
||||
}
|
||||
if (counts[aIndex] == 0 && counts[bIndex] == 0)
|
||||
return 0;
|
||||
out.println(context.getLocation().getContig() + "," +
|
||||
context.getLocation().getStart() + "," + counts[aIndex] + "," + counts[bIndex] + "," + (char) a + "," + (char) b + "," + usedDbSNP);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
out.close();
|
||||
logger.info("[REDUCE RESULT] Traversal result is: " + result);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,224 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.capseg;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broad.tribble.bed.FullBEDFeature;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This tool captures baits depth (as reads per bait) across all baits and
|
||||
* lanes. The output is one line per bait, with columns for each of the
|
||||
* lanes. Reads which overlap two or more baits are added to all baits that
|
||||
* the read overlaps.
|
||||
*
|
||||
*/
|
||||
public class BaitDepthWalker extends ReadWalker<ReadStats, ReadStats> {
|
||||
|
||||
// what sample we should assign to our unknown reads
|
||||
private static final String UNKNOWN_SAMPLE_NAME = "unknown";
|
||||
|
||||
// what separator to use
|
||||
private static final String SEPERATOR = ",";
|
||||
|
||||
// the current output location
|
||||
@Output(doc="File to which baits and coverage information for lanes should be written",required=true)
|
||||
PrintStream out;
|
||||
|
||||
String currentBait = null;
|
||||
|
||||
// our current bait name
|
||||
String baitName = null;
|
||||
|
||||
// create a coverage mapping by readGroup
|
||||
HashMap<String,Integer> currentCoverage = new HashMap<String,Integer>();
|
||||
|
||||
// set of all of then read groups
|
||||
Set<String> readGroups = new LinkedHashSet<String>();
|
||||
|
||||
// set containing the baits and their positions, used for metrics during and after the run
|
||||
HashMap<String,GenomeLoc> baitMappings = new HashMap<String,GenomeLoc>();
|
||||
|
||||
// a structure to hold reads that align to other baits (other than the one we're currently processing)
|
||||
// baits should be contiguous, so reads should not be left in this queue once we've passed the appropriate bait
|
||||
HashMap<String,ArrayList<String>> recordsForOtherBaits = new HashMap<String,ArrayList<String>>();
|
||||
|
||||
// a mapping of the read group to the bam
|
||||
HashMap<String,String> readGroupToBam = new HashMap<String,String>();
|
||||
|
||||
/**
|
||||
* get all of the read groups from the BAM headers, initializing our maps and arrays with this information
|
||||
*/
|
||||
@Override
|
||||
public void initialize() {
|
||||
int bamNumber = 0;
|
||||
for (Set<String> set : getToolkit().getMergedReadGroupsByReaders())
|
||||
for (String smp : set) {
|
||||
readGroups.add(smp);
|
||||
readGroupToBam.put(smp,String.valueOf(bamNumber));
|
||||
bamNumber++;
|
||||
}
|
||||
out.print("#bait,location");
|
||||
for (String st : readGroups) {
|
||||
out.print(SEPERATOR);
|
||||
out.print(st);
|
||||
currentCoverage.put(st,0);
|
||||
}
|
||||
out.println();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the read, determine which baits it falls into, and update our matrix
|
||||
* @param ref the reference bases
|
||||
* @param read the sequencer read
|
||||
* @param metaDataTracker our metadata, used to pull in the BED file
|
||||
* @return a read stat object, which tracks how many reads fell into baits, outside of baits, etc
|
||||
*/
|
||||
@Override
|
||||
public ReadStats map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
// setup the return value
|
||||
ReadStats stats = new ReadStats();
|
||||
|
||||
// get a list of the features
|
||||
List<GATKFeature> features = metaDataTracker.getAllCoveringRods();
|
||||
if (features.size() == 0) {
|
||||
stats.addRead(0);
|
||||
} else {
|
||||
LinkedHashSet<String> coveringBaits = new LinkedHashSet<String>();
|
||||
for (GATKFeature feature : features) {
|
||||
if (feature.getUnderlyingObject() instanceof FullBEDFeature &&
|
||||
!((FullBEDFeature)feature.getUnderlyingObject()).getName().contains("ooster")) {
|
||||
String name = ((FullBEDFeature)feature.getUnderlyingObject()).getName();
|
||||
coveringBaits.add(name);
|
||||
if (!baitMappings.containsKey(name)) {
|
||||
baitMappings.put(name,feature.getLocation());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if (coveringBaits.size() == 1)
|
||||
addToBaits(coveringBaits.iterator().next(),read.getReadGroup().getReadGroupId());
|
||||
else {
|
||||
List<String> baits = new ArrayList<String>(coveringBaits);
|
||||
for (String bait : baits)
|
||||
if (bait.equals(currentBait))
|
||||
addToBaits(bait,read.getReadGroup().getReadGroupId());
|
||||
else {
|
||||
if (!recordsForOtherBaits.containsKey(bait))
|
||||
recordsForOtherBaits.put(bait,new ArrayList<String>());
|
||||
recordsForOtherBaits.get(bait).add(read.getReadGroup().getReadGroupId());
|
||||
}
|
||||
|
||||
}
|
||||
stats.addRead(coveringBaits.size());
|
||||
}
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* add a read to a particular bait name
|
||||
* @param baitName the bait name
|
||||
* @param readGroup the read group (lane) the read came from
|
||||
*/
|
||||
public void addToBaits(String baitName, String readGroup) {
|
||||
// if we haven't seen the bait name yet, create a hashmap for it and dump the currentContents to disk
|
||||
if (currentBait == null || !currentBait.equals(baitName)) {
|
||||
if (currentBait != null) {
|
||||
out.print(currentBait + SEPERATOR);
|
||||
currentBait = baitName;
|
||||
out.print(baitMappings.get(baitName));
|
||||
for (String rg : readGroups) {
|
||||
out.print(SEPERATOR + currentCoverage.get(rg));
|
||||
currentCoverage.put(rg,0);
|
||||
}
|
||||
|
||||
out.println();
|
||||
if (recordsForOtherBaits.containsKey(baitName)) {
|
||||
ArrayList<String> records = recordsForOtherBaits.get(baitName);
|
||||
recordsForOtherBaits.remove(baitName);
|
||||
for (String rec : records)
|
||||
addToBaits(baitName,rec);
|
||||
}
|
||||
|
||||
}
|
||||
currentBait = baitName;
|
||||
|
||||
}
|
||||
if (!readGroups.contains(readGroup))
|
||||
throw new IllegalArgumentException("Novel (unseen) sample name " + readGroup + " is not in the header of any of the BAM files");
|
||||
else
|
||||
currentCoverage.put(readGroup, currentCoverage.get(readGroup) + 1);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReadStats reduceInit() {
|
||||
return new ReadStats();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReadStats reduce(ReadStats value, ReadStats sum) {
|
||||
return sum.add(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* emit the last bait, any remaining baits, and the read stats
|
||||
* @param result the reads stats object
|
||||
*/
|
||||
public void onTraversalDone(ReadStats result) {
|
||||
out.print(baitName + SEPERATOR);
|
||||
out.print(baitMappings.get(baitName));
|
||||
for (String rg : this.readGroups)
|
||||
out.print(SEPERATOR + currentCoverage.get(rg));
|
||||
out.println();
|
||||
/*for (Map.Entry<String,ArrayList<String>> entry : recordsForOtherBaits.entrySet()) {
|
||||
out.print(entry.getKey() + SEPERATOR);
|
||||
out.print(baitMappings.get(baitName));
|
||||
for (String rg : this.readGroups) {
|
||||
int index = entry.getValue().indexOf(rg);
|
||||
///out.print(SEPERATOR + (index < 0 ? 0 : entry.getValue().));
|
||||
}
|
||||
out.println();
|
||||
}*/
|
||||
|
||||
currentBait = baitName;
|
||||
currentCoverage.clear();
|
||||
out.flush();
|
||||
out.close();
|
||||
logger.info("[REDUCE RESULT] Traversal result is: " + result);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// a small helper class for storing read statistics
|
||||
class ReadStats {
|
||||
int numberInBait = 0;
|
||||
int numberOutsideOfBait = 0;
|
||||
int baitToReadCount = 0; // the number of baits we've seen, used to determine the bait/read ratio
|
||||
int readCount = 0;
|
||||
|
||||
public void addRead(int baitCount) {
|
||||
baitToReadCount += baitCount;
|
||||
numberInBait += (baitCount > 0) ? 1 : 0;
|
||||
numberOutsideOfBait += (baitCount > 0) ? 0 : 1;
|
||||
readCount++;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Number in baits = " + numberInBait + ", number outside of baits = " + numberOutsideOfBait + ", baits per read" + (baitToReadCount/readCount);
|
||||
}
|
||||
|
||||
public org.broadinstitute.sting.gatk.walkers.capseg.ReadStats add(org.broadinstitute.sting.gatk.walkers.capseg.ReadStats stat) {
|
||||
this.numberInBait += stat.numberInBait;
|
||||
this.numberOutsideOfBait += stat.numberOutsideOfBait;
|
||||
this.baitToReadCount += stat.baitToReadCount;
|
||||
this.readCount += stat.readCount;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.capseg;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaron
|
||||
* Date: 3/7/11
|
||||
* Time: 3:40 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SampleToLaneWalker extends ReadWalker<Integer,Integer> {
|
||||
|
||||
@Argument(doc="Sample to read group file", shortName="strf", required=true)
|
||||
BufferedOutputStream sampleToReadGroup;
|
||||
|
||||
@Output(doc="BAM file to sample file", shortName="bs")
|
||||
BufferedOutputStream bamToSample;
|
||||
|
||||
public void initialize() {
|
||||
PrintWriter sr_writer = new PrintWriter(sampleToReadGroup);
|
||||
PrintWriter bs_writer = new PrintWriter(bamToSample);
|
||||
|
||||
// write out the sample to read-group file
|
||||
sr_writer.println("Sample\tReadGroup");
|
||||
SAMFileHeader header = getToolkit().getSAMFileHeader();
|
||||
for (SAMReadGroupRecord rec : header.getReadGroups()) {
|
||||
Sample sample = getToolkit().getSampleByReadGroup(rec);
|
||||
sr_writer.println(sample.getId() + "\t" + rec.getReadGroupId());
|
||||
}
|
||||
sr_writer.flush();
|
||||
|
||||
// write out the bam file to the sample information
|
||||
bs_writer.println("BAM\tSample");
|
||||
for (SAMReaderID rec : getToolkit().getReadsDataSource().getReaderIDs()) {
|
||||
File fl = getToolkit().getSourceFileForReaderID(rec);
|
||||
Iterator<SAMReadGroupRecord> iter = getToolkit().getSAMFileHeader(rec).getReadGroups().iterator();
|
||||
Set<String> names = new HashSet <String>();
|
||||
while (iter.hasNext())
|
||||
names.add(iter.next().getSample());
|
||||
Iterator<String> strs = names.iterator();
|
||||
String rg = "";
|
||||
if (strs.hasNext())
|
||||
rg = strs.next();
|
||||
while (strs.hasNext()) {
|
||||
rg = rg + ";" + strs.next();
|
||||
}
|
||||
bs_writer.println(fl.getName() + "\t" + rg);
|
||||
}
|
||||
bs_writer.flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue