Skeleton of Somatic Coverage tool

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@342 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kcibul 2009-04-09 02:34:03 +00:00
parent 12752cf893
commit c556a97f17
1 changed files with 115 additions and 0 deletions

View File

@ -0,0 +1,115 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.Formatter;
import static java.lang.Math.log10;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import edu.mit.broad.picard.PicardException;
public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName = "tumor_sample_name", shortName = "1", required = true)
String tumorSampleName = "TCGA-06-0188-01A-01W"; // FIXME: cmdline parsing not working!
@Argument(fullName = "normal_sample_name", shortName = "2", required = true)
String normalSampleName = "TCGA-06-0188-10B-01W"; // FIXME: cmdline parsing not working!
public void initialize() {
}
public String walkerType() { return "ByLocus"; }
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
return true; // We are keeping all the reads
}
// Map over the org.broadinstitute.sting.gatk.LocusContext
int MAPPING_QUALITY_THRESHOLD = 1;
int totalSites;
int tumorCovered;
int normalCovered;
int somaticCovered;
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
List<SAMRecord> reads = context.getReads();
int tumorDepth = 0;
int normalDepth = 0;
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
// TODO: could this be done better elsewhere?
// only process primary, non duplicate alignments
// that come from fully mapped pairs with a mappign quality threshold >= x
if (read.getNotPrimaryAlignmentFlag() ||
read.getDuplicateReadFlag() ||
read.getReadUnmappedFlag()
||
read.getMateUnmappedFlag() ||
read.getMappingQuality() < MAPPING_QUALITY_THRESHOLD
) {
continue;
}
String rg = (String) read.getAttribute("RG");
String sample = read.getHeader().getReadGroup(rg).getSample();
if (normalSampleName.equals(sample)) {
normalDepth++;
} else if (tumorSampleName.equals(sample)) {
tumorDepth++;
} else {
throw new RuntimeException("Unknown Sample Name: " + sample);
}
}
boolean isTumorCovered = tumorDepth >= 14;
boolean isNormalCovered = normalDepth >= 8;
if (isTumorCovered) { tumorCovered++; }
if (isNormalCovered) { normalCovered++; }
if (isTumorCovered && isNormalCovered) {somaticCovered++; }
totalSites++;
if (totalSites % 20000 == 0) {
out.println(String.format("%s:%d %d %d %d %d", context.getContig(), context.getPosition(), totalSites, tumorCovered, normalCovered, somaticCovered));
}
return 1;
}
// Given result of map function
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
@Override
public void onTraversalDone(Integer result) {
out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered));
}
}