diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/capseg/AlleleCountWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/capseg/AlleleCountWalker.java deleted file mode 100755 index d5618b5a3..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/capseg/AlleleCountWalker.java +++ /dev/null @@ -1,135 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.capseg; - -import org.broad.tribble.util.variantcontext.Allele; -import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.By; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Random; -import java.util.Set; - -/** - * get the allele counts at loci that overlap both the bait list and DbSNP - */ -//@By(DataSource.REFERENCE_ORDERED_DATA) -public class AlleleCountWalker extends LocusWalker { - @Argument(doc = "our output file name", shortName = "fl") - File output; - - String dbTrack = "DbSNP"; - String callTrack = "calls"; - Random generator = new Random(); - private PrintWriter out; - - public void initialize() { - try { - out = new PrintWriter(new FileWriter(output)); - } catch (IOException e) { - throw new IllegalArgumentException("Uncaught exception!"); - } - out.println("contig,position,A,B,aBase,bBase,dbSNPOrientation"); - } - - @Override - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - // check that the tracker and the context are not null, and we a variant, and some amount of bases - if (tracker == null || context == null || tracker.getAllVariantContexts(ref).size() < 1 || context.getBasePileup().size() == 0) - return 0; - - // get the het - VariantContext het = tracker.getVariantContext(ref, callTrack, context.getLocation()); - VariantContext dbSNP = tracker.getVariantContext(ref, dbTrack, context.getLocation()); - if (het == null || het.getHetCount() == 0) return 0; - - - byte[] bases = context.getBasePileup().getBases(); - int[] counts = new int[5]; - for (byte b : bases) { - if (b == 'a' || b == 'A') counts[0]++; - else if (b == 'c' || b == 'C') counts[1]++; - else if (b == 'g' || b == 'G') counts[2]++; - else if (b == 't' || b == 'T') counts[3]++; - else counts[4]++; - } - int aIndex = 0; - byte a = 'a'; - - // find the maximumvalue - for (int index = 0; index < counts.length; index++) - if (counts[index] > counts[aIndex]) { - aIndex = index; - a = (byte) ((index < 1) ? 'a' : (index < 2) ? 'c' : (index < 3) ? 'g' : 't'); - } - - int bIndex = (aIndex != 0) ? 0 : 1; - byte b = 'a'; - for (int index = 0; index < counts.length; index++) - if (counts[index] > counts[bIndex] && index != aIndex) { - bIndex = index; - b = (byte) ((index < 1) ? 'a' : (index < 2) ? 'c' : (index < 3) ? 'g' : 't'); - } - - boolean usedDbSNP = false; - // check if there is an ordering that we'd like to subscribe to - if (dbSNP != null) { - // get the alt from the DbSNP file - Set alts = dbSNP.getAlternateAlleles(); - - // if true, swap the two - if (alts.size() == 1 && alts.iterator().next().basesMatch(String.valueOf((char) a))) { - byte temp = a; - a = b; - b = temp; - - int tmp = aIndex; - aIndex = bIndex; - bIndex = tmp; - usedDbSNP = true; - } else if (alts.size() == 1 && alts.iterator().next().basesMatch(String.valueOf((char) b))) { - usedDbSNP = true; - } - } - - if (!usedDbSNP && generator.nextDouble() > 0.5) { - - byte temp = a; - a = b; - b = temp; - - int tmp = aIndex; - aIndex = bIndex; - bIndex = tmp; - } - if (counts[aIndex] == 0 && counts[bIndex] == 0) - return 0; - out.println(context.getLocation().getContig() + "," + - context.getLocation().getStart() + "," + counts[aIndex] + "," + counts[bIndex] + "," + (char) a + "," + (char) b + "," + usedDbSNP); - return 1; - } - - @Override - public Integer reduceInit() { - return 0; - } - - @Override - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - public void onTraversalDone(Integer result) { - out.close(); - logger.info("[REDUCE RESULT] Traversal result is: " + result); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/capseg/BaitDepthWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/capseg/BaitDepthWalker.java deleted file mode 100755 index 4554d6a30..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/capseg/BaitDepthWalker.java +++ /dev/null @@ -1,224 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.capseg; - -import net.sf.samtools.SAMRecord; -import org.broad.tribble.bed.FullBEDFeature; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.io.PrintStream; -import java.util.*; - -/** - * This tool captures baits depth (as reads per bait) across all baits and - * lanes. The output is one line per bait, with columns for each of the - * lanes. Reads which overlap two or more baits are added to all baits that - * the read overlaps. - * - */ -public class BaitDepthWalker extends ReadWalker { - - // what sample we should assign to our unknown reads - private static final String UNKNOWN_SAMPLE_NAME = "unknown"; - - // what separator to use - private static final String SEPERATOR = ","; - - // the current output location - @Output(doc="File to which baits and coverage information for lanes should be written",required=true) - PrintStream out; - - String currentBait = null; - - // our current bait name - String baitName = null; - - // create a coverage mapping by readGroup - HashMap currentCoverage = new HashMap(); - - // set of all of then read groups - Set readGroups = new LinkedHashSet(); - - // set containing the baits and their positions, used for metrics during and after the run - HashMap baitMappings = new HashMap(); - - // a structure to hold reads that align to other baits (other than the one we're currently processing) - // baits should be contiguous, so reads should not be left in this queue once we've passed the appropriate bait - HashMap> recordsForOtherBaits = new HashMap>(); - - // a mapping of the read group to the bam - HashMap readGroupToBam = new HashMap(); - - /** - * get all of the read groups from the BAM headers, initializing our maps and arrays with this information - */ - @Override - public void initialize() { - int bamNumber = 0; - for (Set set : getToolkit().getMergedReadGroupsByReaders()) - for (String smp : set) { - readGroups.add(smp); - readGroupToBam.put(smp,String.valueOf(bamNumber)); - bamNumber++; - } - out.print("#bait,location"); - for (String st : readGroups) { - out.print(SEPERATOR); - out.print(st); - currentCoverage.put(st,0); - } - out.println(); - } - - /** - * get the read, determine which baits it falls into, and update our matrix - * @param ref the reference bases - * @param read the sequencer read - * @param metaDataTracker our metadata, used to pull in the BED file - * @return a read stat object, which tracks how many reads fell into baits, outside of baits, etc - */ - @Override - public ReadStats map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { - // setup the return value - ReadStats stats = new ReadStats(); - - // get a list of the features - List features = metaDataTracker.getAllCoveringRods(); - if (features.size() == 0) { - stats.addRead(0); - } else { - LinkedHashSet coveringBaits = new LinkedHashSet(); - for (GATKFeature feature : features) { - if (feature.getUnderlyingObject() instanceof FullBEDFeature && - !((FullBEDFeature)feature.getUnderlyingObject()).getName().contains("ooster")) { - String name = ((FullBEDFeature)feature.getUnderlyingObject()).getName(); - coveringBaits.add(name); - if (!baitMappings.containsKey(name)) { - baitMappings.put(name,feature.getLocation()); - } - - } - } - if (coveringBaits.size() == 1) - addToBaits(coveringBaits.iterator().next(),read.getReadGroup().getReadGroupId()); - else { - List baits = new ArrayList(coveringBaits); - for (String bait : baits) - if (bait.equals(currentBait)) - addToBaits(bait,read.getReadGroup().getReadGroupId()); - else { - if (!recordsForOtherBaits.containsKey(bait)) - recordsForOtherBaits.put(bait,new ArrayList()); - recordsForOtherBaits.get(bait).add(read.getReadGroup().getReadGroupId()); - } - - } - stats.addRead(coveringBaits.size()); - } - return stats; - } - - /** - * add a read to a particular bait name - * @param baitName the bait name - * @param readGroup the read group (lane) the read came from - */ - public void addToBaits(String baitName, String readGroup) { - // if we haven't seen the bait name yet, create a hashmap for it and dump the currentContents to disk - if (currentBait == null || !currentBait.equals(baitName)) { - if (currentBait != null) { - out.print(currentBait + SEPERATOR); - currentBait = baitName; - out.print(baitMappings.get(baitName)); - for (String rg : readGroups) { - out.print(SEPERATOR + currentCoverage.get(rg)); - currentCoverage.put(rg,0); - } - - out.println(); - if (recordsForOtherBaits.containsKey(baitName)) { - ArrayList records = recordsForOtherBaits.get(baitName); - recordsForOtherBaits.remove(baitName); - for (String rec : records) - addToBaits(baitName,rec); - } - - } - currentBait = baitName; - - } - if (!readGroups.contains(readGroup)) - throw new IllegalArgumentException("Novel (unseen) sample name " + readGroup + " is not in the header of any of the BAM files"); - else - currentCoverage.put(readGroup, currentCoverage.get(readGroup) + 1); - - } - - @Override - public ReadStats reduceInit() { - return new ReadStats(); - } - - @Override - public ReadStats reduce(ReadStats value, ReadStats sum) { - return sum.add(value); - } - - /** - * emit the last bait, any remaining baits, and the read stats - * @param result the reads stats object - */ - public void onTraversalDone(ReadStats result) { - out.print(baitName + SEPERATOR); - out.print(baitMappings.get(baitName)); - for (String rg : this.readGroups) - out.print(SEPERATOR + currentCoverage.get(rg)); - out.println(); - /*for (Map.Entry> entry : recordsForOtherBaits.entrySet()) { - out.print(entry.getKey() + SEPERATOR); - out.print(baitMappings.get(baitName)); - for (String rg : this.readGroups) { - int index = entry.getValue().indexOf(rg); - ///out.print(SEPERATOR + (index < 0 ? 0 : entry.getValue().)); - } - out.println(); - }*/ - - currentBait = baitName; - currentCoverage.clear(); - out.flush(); - out.close(); - logger.info("[REDUCE RESULT] Traversal result is: " + result); - } - -} - -// a small helper class for storing read statistics -class ReadStats { - int numberInBait = 0; - int numberOutsideOfBait = 0; - int baitToReadCount = 0; // the number of baits we've seen, used to determine the bait/read ratio - int readCount = 0; - - public void addRead(int baitCount) { - baitToReadCount += baitCount; - numberInBait += (baitCount > 0) ? 1 : 0; - numberOutsideOfBait += (baitCount > 0) ? 0 : 1; - readCount++; - } - - public String toString() { - return "Number in baits = " + numberInBait + ", number outside of baits = " + numberOutsideOfBait + ", baits per read" + (baitToReadCount/readCount); - } - - public org.broadinstitute.sting.gatk.walkers.capseg.ReadStats add(org.broadinstitute.sting.gatk.walkers.capseg.ReadStats stat) { - this.numberInBait += stat.numberInBait; - this.numberOutsideOfBait += stat.numberOutsideOfBait; - this.baitToReadCount += stat.baitToReadCount; - this.readCount += stat.readCount; - return this; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/capseg/SampleToLaneWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/capseg/SampleToLaneWalker.java deleted file mode 100755 index bd9cbaa27..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/capseg/SampleToLaneWalker.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.capseg; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; - -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.PrintWriter; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; - -/** - * Created by IntelliJ IDEA. - * User: aaron - * Date: 3/7/11 - * Time: 3:40 PM - * To change this template use File | Settings | File Templates. - */ -public class SampleToLaneWalker extends ReadWalker { - - @Argument(doc="Sample to read group file", shortName="strf", required=true) - BufferedOutputStream sampleToReadGroup; - - @Output(doc="BAM file to sample file", shortName="bs") - BufferedOutputStream bamToSample; - - public void initialize() { - PrintWriter sr_writer = new PrintWriter(sampleToReadGroup); - PrintWriter bs_writer = new PrintWriter(bamToSample); - - // write out the sample to read-group file - sr_writer.println("Sample\tReadGroup"); - SAMFileHeader header = getToolkit().getSAMFileHeader(); - for (SAMReadGroupRecord rec : header.getReadGroups()) { - Sample sample = getToolkit().getSampleByReadGroup(rec); - sr_writer.println(sample.getId() + "\t" + rec.getReadGroupId()); - } - sr_writer.flush(); - - // write out the bam file to the sample information - bs_writer.println("BAM\tSample"); - for (SAMReaderID rec : getToolkit().getReadsDataSource().getReaderIDs()) { - File fl = getToolkit().getSourceFileForReaderID(rec); - Iterator iter = getToolkit().getSAMFileHeader(rec).getReadGroups().iterator(); - Set names = new HashSet (); - while (iter.hasNext()) - names.add(iter.next().getSample()); - Iterator strs = names.iterator(); - String rg = ""; - if (strs.hasNext()) - rg = strs.next(); - while (strs.hasNext()) { - rg = rg + ";" + strs.next(); - } - bs_writer.println(fl.getName() + "\t" + rg); - } - bs_writer.flush(); - } - - @Override - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { - return 0; //To change body of implemented methods use File | Settings | File Templates. - } - - @Override - public Integer reduceInit() { - return 0; //To change body of implemented methods use File | Settings | File Templates. - } - - @Override - public Integer reduce(Integer value, Integer sum) { - return 0; //To change body of implemented methods use File | Settings | File Templates. - } -}