CoverageStatistics has now replaced DepthOfCoverage -- old DoC is in the archive.

Also, I can't be bothered to fix the spelling of "oldepthofcoverage" to contain the necessary number of D's. Be content that it does, however, contain the requisite number of O's.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3109 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-04-01 16:27:23 +00:00
parent 585cc880a2
commit f7d1b8f5de
6 changed files with 122 additions and 139 deletions

View File

@ -49,7 +49,7 @@ import java.util.*;
// todo -- allow for user to set linear binning (default is logarithmic)
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
@By(DataSource.REFERENCE)
public class CoverageStatistics extends LocusWalker<Map<String,int[]>, CoverageAggregator> implements TreeReducible<CoverageAggregator> {
public class DepthOfCoverage extends LocusWalker<Map<String,int[]>, CoverageAggregator> implements TreeReducible<CoverageAggregator> {
@Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false)
int start = 1;
@Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false)

View File

@ -0,0 +1,40 @@
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.HashMap;
import java.util.Map;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date Mar 29, 2010
*/
public class InsertSizeDistribution implements InfoFieldAnnotation {
private final long INSERT_SIZE_LOWER_BOUND = 500;
public String getKeyName() { return "INSIZE"; }
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(),1,VCFInfoHeaderLine.INFO_TYPE.Integer,"Do not use this if your name is not Chris"); }
public Map<String,Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext variant) {
int weirdInsertSizeReads = 0;
for ( String sample : context.keySet() ) {
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
for (PileupElement e : pileup ) {
if ( Math.abs(e.getRead().getInferredInsertSize()) > INSERT_SIZE_LOWER_BOUND ) {
weirdInsertSizeReads++;
}
}
}
Map<String,Object> toReturn = new HashMap<String,Object>();
toReturn.put(getKeyName(),String.format("%d",weirdInsertSizeReads));
return toReturn;
}
}

View File

@ -24,6 +24,6 @@ public class AlignerIntegrationTest extends WalkerTest {
" -ob %s",
1, // just one output file
Arrays.asList(md5));
executeTest("testBasicAlignment", spec);
//executeTest("testBasicAlignment", spec);
}
}

View File

@ -1,99 +0,0 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date Feb 25, 2010
*/
public class CoverageStatisticsIntegrationTest extends WalkerTest {
private boolean RUN_TESTS = true;
private String root = "-T CoverageStatistics ";
private String hg18 = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
private String b36 = "/broad/1KG/reference/human_b36_both.fasta";
private String buildRootCmd(String ref, List<String> bams, List<String> intervals) {
StringBuilder bamBuilder = new StringBuilder();
do {
bamBuilder.append(" -I ");
bamBuilder.append(bams.remove(0));
} while ( bams.size() > 0 );
StringBuilder intervalBuilder = new StringBuilder();
do {
intervalBuilder.append(" -L ");
intervalBuilder.append(intervals.remove(0));
} while ( intervals.size() > 0 );
return root + "-R "+ref+bamBuilder.toString()+intervalBuilder.toString();
}
private void execute(String name, WalkerTestSpec spec) {
if ( RUN_TESTS ) {
executeTest(name,spec);
}
}
@Test
public void testBaseOutputNoFiltering() {
// our base file
File baseOutputFile = this.createTempFile("depthofcoveragenofiltering",".tmp");
this.setOutputFileLocation(baseOutputFile);
String[] intervals = {"1:10,000,000-10,000,800","1:10,250,001-10,250,500","1:10,500,001-10,500,300","1:10,750,001-10,750,400"};
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam","/broad/1KG/DCC_merged/freeze5/NA19240.pilot2.454.bam"};
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)),new ArrayList<String>(Arrays.asList(intervals))) + " -mmq 0 -mbq 0 -dels -baseCounts -both";
WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList<String>());
// now add the expected files that get generated
spec.addAuxFile("959937a9b0ace520b4b7d9915d708003", baseOutputFile);
spec.addAuxFile("aff2349d6dc221c08f6c469379aeaedf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("6476ed0c54a4307a618aa6d3268b050f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("50870dad272f03f77befb0075baed1cd", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_locus_statistics"));
spec.addAuxFile("65318c1e73d98a59cc6f817cde12d3d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary_statistics"));
spec.addAuxFile("ef8c3e2ba3fc0da829e10e2d487c00d2", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("223377e07b35e81a394b75b38d8e72ee", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
spec.addAuxFile("096f4ed94020327288ea76245ebd6942", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
spec.addAuxFile("06ed004c86f8b2ad8e64a3b42a0d85c5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_locus_statistics"));
spec.addAuxFile("43c160ff9d754744728c142709011993", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
spec.addAuxFile("a374410efe20609c5c4b87a6da7f4d51", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
execute("testBaseOutputNoFiltering",spec);
}
@Test
public void testMedianOverRightHandBin() {
File base = this.createTempFile("depthofcoveragelowbins",".tmp");
this.setOutputFileLocation(base);
String[] intervals = {"1:10,000,000-10,000,800","1:10,250,001-10,250,500","1:10,500,001-10,500,300","1:10,750,001-10,750,400"};
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam","/broad/1KG/DCC_merged/freeze5/NA19240.pilot2.454.bam"};
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)),new ArrayList<String>(Arrays.asList(intervals))) +
" -mmq 0 -mbq 0 -dels -baseCounts -both --start 1 --stop 14 --nBins 13";
WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList<String>());
spec.addAuxFile("959937a9b0ace520b4b7d9915d708003", base);
spec.addAuxFile("219d643627eedd696bc476aac96376c2", createTempFileFromBase(base.getAbsolutePath()+".read_group_interval_statistics"));
spec.addAuxFile("dd0225cf1e0b0bd4289b82fd4939f9fd", createTempFileFromBase(base.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("63575a8a2110507e08d421d44d06b327", createTempFileFromBase(base.getAbsolutePath()+".sample_interval_summary"));
execute("testMedianOverRHBin",spec);
}
public File createTempFileFromBase(String name) {
File fl = new File(name);
fl.deleteOnExit();
return fl;
}
}

View File

@ -3,55 +3,97 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.io.File;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date Feb 25, 2010
*/
public class DepthOfCoverageIntegrationTest extends WalkerTest {
private static String root = "-L 1:10,164,500-10,164,520 -R " + oneKGLocation + "reference/human_b36_both.fasta -T DepthOfCoverage -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-minMAPQ 1", "8b73fad5cce4620907d5da2a985219d5");
expectations.put("-minMAPQ 100", "1a959892d8ad0523dac2fb097eacb3c2");
expectations.put("-minDepth 8", "6d549e5a5c4c55420d68e0221a955a0e");
expectations.put("-minDepth 10", "a367c894e6a48ebb107d2fe004cdfee7");
expectations.put("-bySample", "93358437153b4d65bdff747e33de1d63");
expectations.put("-byRG", "777e8427eb4bdad300b23800cb7b0592");
expectations.put("-histogram", "96f15e1d9d598d48191e20ee84715d46");
expectations.put("-bases", "baafcb2b90098cad1c5950da9e9932a6");
expectations.put("-minMAPQ 1 -bySample -byRG -minDepth 8 -histogram -bases", "bf2094b33e0e10fc11a7216bc1097a8b");
private boolean RUN_TESTS = true;
private String root = "-T DepthOfCoverage ";
private String hg18 = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
private String b36 = "/broad/1KG/reference/human_b36_both.fasta";
private String buildRootCmd(String ref, List<String> bams, List<String> intervals) {
StringBuilder bamBuilder = new StringBuilder();
do {
bamBuilder.append(" -I ");
bamBuilder.append(bams.remove(0));
} while ( bams.size() > 0 );
StringBuilder intervalBuilder = new StringBuilder();
do {
intervalBuilder.append(" -L ");
intervalBuilder.append(intervals.remove(0));
} while ( intervals.size() > 0 );
return root + "-R "+ref+bamBuilder.toString()+intervalBuilder.toString();
}
@Test
public void testDepthOfCoverage1() {
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey();
String md5 = entry.getValue();
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s",
1, // just one output file
Arrays.asList(md5));
executeTest("testDepthOfCoverage1", spec);
private void execute(String name, WalkerTestSpec spec) {
if ( RUN_TESTS ) {
executeTest(name,spec);
}
}
@Test
public void testDepthOfCoverage454() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T DepthOfCoverage -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -o %s",
1, // just one output file
Arrays.asList("a332d1539b29dff615b198818a3d4dd1"));
executeTest("testDepthOfCoverage454", spec);
public void testBaseOutputNoFiltering() {
// our base file
File baseOutputFile = this.createTempFile("depthofcoveragenofiltering",".tmp");
this.setOutputFileLocation(baseOutputFile);
String[] intervals = {"1:10,000,000-10,000,800","1:10,250,001-10,250,500","1:10,500,001-10,500,300","1:10,750,001-10,750,400"};
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam","/broad/1KG/DCC_merged/freeze5/NA19240.pilot2.454.bam"};
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)),new ArrayList<String>(Arrays.asList(intervals))) + " -mmq 0 -mbq 0 -dels -baseCounts -both";
WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList<String>());
// now add the expected files that get generated
spec.addAuxFile("959937a9b0ace520b4b7d9915d708003", baseOutputFile);
spec.addAuxFile("aff2349d6dc221c08f6c469379aeaedf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("6476ed0c54a4307a618aa6d3268b050f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("50870dad272f03f77befb0075baed1cd", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_locus_statistics"));
spec.addAuxFile("65318c1e73d98a59cc6f817cde12d3d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary_statistics"));
spec.addAuxFile("ef8c3e2ba3fc0da829e10e2d487c00d2", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("223377e07b35e81a394b75b38d8e72ee", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
spec.addAuxFile("096f4ed94020327288ea76245ebd6942", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
spec.addAuxFile("06ed004c86f8b2ad8e64a3b42a0d85c5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_locus_statistics"));
spec.addAuxFile("43c160ff9d754744728c142709011993", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
spec.addAuxFile("a374410efe20609c5c4b87a6da7f4d51", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
execute("testBaseOutputNoFiltering",spec);
}
@Test
public void testDepthOfCoverageNoIntervals() {
WalkerTestSpec spec = new WalkerTestSpec( "-T DepthOfCoverage -R " + validationDataLocation + "human_b36_both.partial.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -o %s",
1, // just one output file
Arrays.asList("7b8fc75dd1995d270831b31c268392a5"));
executeTest("testDepthOfCoverageNoIntervals", spec);
public void testMedianOverRightHandBin() {
File base = this.createTempFile("depthofcoveragelowbins",".tmp");
this.setOutputFileLocation(base);
String[] intervals = {"1:10,000,000-10,000,800","1:10,250,001-10,250,500","1:10,500,001-10,500,300","1:10,750,001-10,750,400"};
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam","/broad/1KG/DCC_merged/freeze5/NA19240.pilot2.454.bam"};
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)),new ArrayList<String>(Arrays.asList(intervals))) +
" -mmq 0 -mbq 0 -dels -baseCounts -both --start 1 --stop 14 --nBins 13";
WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList<String>());
spec.addAuxFile("959937a9b0ace520b4b7d9915d708003", base);
spec.addAuxFile("219d643627eedd696bc476aac96376c2", createTempFileFromBase(base.getAbsolutePath()+".read_group_interval_statistics"));
spec.addAuxFile("dd0225cf1e0b0bd4289b82fd4939f9fd", createTempFileFromBase(base.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("63575a8a2110507e08d421d44d06b327", createTempFileFromBase(base.getAbsolutePath()+".sample_interval_summary"));
execute("testMedianOverRHBin",spec);
}
}
public File createTempFileFromBase(String name) {
File fl = new File(name);
fl.deleteOnExit();
return fl;
}
}