diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index 255945619..337c2664c 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.contexts; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -42,7 +43,7 @@ import java.util.*; * Time: 3:01:34 PM * To change this template use File | Settings | File Templates. */ -public class AlignmentContext { +public class AlignmentContext implements HasGenomeLocation { protected GenomeLoc loc = null; protected ReadBackedPileup basePileup = null; protected boolean hasPileupBeenDownsampled; diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java index 046d874b7..9f1bb7984 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.contexts; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; +import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -39,7 +40,7 @@ import java.util.*; * User: ebanks * Modified: chartl (split by read group) */ -public class StratifiedAlignmentContext { +public class StratifiedAlignmentContext implements HasGenomeLocation { // Definitions: // COMPLETE = full alignment context diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java index 22f00ac67..08bd8869f 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -189,7 +190,7 @@ class EntireStream implements DataStreamSegment { /** * Models a mapped position within a stream of GATK input data. */ -class MappedStreamSegment implements DataStreamSegment { +class MappedStreamSegment implements DataStreamSegment, HasGenomeLocation { public final GenomeLoc locus; /** diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java index 3a7b26753..cff97e4ee 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; @@ -14,7 +15,7 @@ import java.util.*; * Time: 6:10:48 PM * To change this template use File | Settings | File Templates. */ -public class RODRecordListImpl extends AbstractList implements Comparable, Cloneable, RODRecordList { +public class RODRecordListImpl extends AbstractList implements Comparable, Cloneable, RODRecordList, HasGenomeLocation { private List records; private GenomeLoc location = null; private String name = null; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDatum.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDatum.java index c2fa95090..00eaf92e0 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDatum.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDatum.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; import java.io.File; import java.io.FileNotFoundException; @@ -13,7 +14,7 @@ import java.io.IOException; * Time: 10:49:47 AM * To change this template use File | Settings | File Templates. */ -public interface ReferenceOrderedDatum extends Comparable { +public interface ReferenceOrderedDatum extends Comparable, HasGenomeLocation { public String getName(); public boolean parseLine(final Object header, final String[] parts) throws IOException; public String toString(); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java b/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java index 688ffafee..b8a0868dd 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; import java.util.List; @@ -11,7 +12,7 @@ import java.util.List; * Time: 5:22:30 PM * To change this template use File | Settings | File Templates. */ -public interface Transcript { +public interface Transcript extends HasGenomeLocation { /** Returns id of the transcript (RefSeq NM_* id) */ public String getTranscriptId(); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java index dc6da7132..2b48f61a6 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.refdata.utils; import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; import java.util.Comparator; import java.util.LinkedList; @@ -190,7 +191,7 @@ public class FlashBackIterator implements LocationAwareSeekableRODIterator { /** * a list that buffers the location for this rod */ -class ComparableList implements Comparator { +class ComparableList implements Comparator, HasGenomeLocation { private RODRecordList list; private GenomeLoc location = null; public ComparableList(RODRecordList list) { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java index 9453ec484..1553402a5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java @@ -27,6 +27,7 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -40,7 +41,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; * This wraps a Tribble feature or a RODatum so that both present the same interface: a genome loc for position and a * way of retrieving the track name. */ -public abstract class GATKFeature implements Feature { +public abstract class GATKFeature implements Feature, HasGenomeLocation { public GATKFeature(String name) { this.name = name; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java index 290e5cd5c..00c64edbd 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.refdata.utils; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; import java.util.List; @@ -36,7 +37,7 @@ import java.util.List; * make the RODRecord list an interface, so we can stub in other implementations * during testing. */ -public interface RODRecordList extends List, Comparable { +public interface RODRecordList extends List, Comparable, HasGenomeLocation { public GenomeLoc getLocation(); public String getName(); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java index 2cf640317..2c67265d6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java @@ -99,7 +99,7 @@ public class CallableLociWalker extends LocusWalker { } } - private class ReadBin { + private class ReadBin implements HasGenomeLocation { private final ArrayList reads = new ArrayList(); private byte[] reference = null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 917871d37..7e2476f8d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -1086,7 +1086,7 @@ public class ReadBackedPhasingWalker extends RodWalker, Cloneable, Serializable { +public class GenomeLoc implements Comparable, Cloneable, Serializable, HasGenomeLocation { /** * the basic components of a genome loc, its contig index, * start and stop position, and (optionally) the contig name @@ -38,7 +38,8 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable public static final boolean isUnmapped(GenomeLoc loc) { return loc == UNMAPPED; } - + public static final GenomeLoc WHOLE_GENOME = new GenomeLoc("all",-1,0,0); + // -------------------------------------------------------------------------------------------------------------- // // constructors @@ -68,6 +69,8 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable // // Accessors and setters // + public final GenomeLoc getLocation() { return this; } + public final String getContig() { return this.contigName; } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 85156abf1..c2d660b97 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils.pileup; import org.broadinstitute.sting.gatk.datasources.sample.Sample; +import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.BaseUtils; diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index f4232e7fd..c53476747 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -28,6 +28,7 @@ import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.iterators.IterableIterator; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.HasGenomeLocation; import java.util.List; import java.util.Collection; @@ -38,7 +39,7 @@ import java.util.Collection; * @author mhanna * @version 0.1 */ -public interface ReadBackedPileup extends Iterable { +public interface ReadBackedPileup extends Iterable, HasGenomeLocation { /** * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index aa8ac5377..d3c92c254 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -52,7 +52,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(); GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new LocusShard(new SAMDataSource(Collections.emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.emptyMap()); + Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.emptyMap()); WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource()); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java index 8b65f0900..db2aaf7b5 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java @@ -40,6 +40,9 @@ import java.util.Collections; */ public class MockLocusShard extends LocusShard { public MockLocusShard(final GenomeLocParser genomeLocParser,final List intervals) { - super(new SAMDataSource(Collections.emptyList(),genomeLocParser),intervals,null); + super( genomeLocParser, + new SAMDataSource(Collections.emptyList(),genomeLocParser), + intervals, + null); } } diff --git a/scala/qscript/oneoffs/depristo/distributedGATKPerformance.scala b/scala/qscript/oneoffs/depristo/distributedGATKPerformance.scala new file mode 100755 index 000000000..177aa375c --- /dev/null +++ b/scala/qscript/oneoffs/depristo/distributedGATKPerformance.scala @@ -0,0 +1,153 @@ +import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction +import org.broadinstitute.sting.queue.QScript +import org.apache.commons.io.FilenameUtils; + +class DistributedGATKPerformance extends QScript { + qscript => + + @Argument(shortName="gatk", doc="gatk jar file", required=true) + var gatkJarFile: File = _ + + @Argument(shortName="outputDir", doc="output directory", required=false) + var outputDir: String = "" + + @Argument(shortName="dataset", doc="selects the datasets to run. If not provided, all datasets will be used", required=false) + var datasets: List[String] = Nil + + @Argument(shortName="long", doc="runs long calculations", required=false) + var long: Boolean = false + + + //@Argument(shortName="noBAQ", doc="turns off BAQ calculation", required=false) + var noBAQ: Boolean = false + + trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile; memoryLimit = Some(3); } + + class Target( + val baseName: String, + val reference: File, + val dbsnpFile: String, + val hapmapFile: String, + val maskFile: String, + val bamList: File, + val goldStandard_VCF: File, + val intervals: String, + val titvTarget: Double, + val isLowpass: Boolean) { + val name = qscript.outputDir + baseName + val clusterFile = new File(name + ".clusters") + def rawVCF(part: String) = new File(name + "." + part + ".raw.vcf") + val filteredVCF = new File(name + ".filtered.vcf") + val titvRecalibratedVCF = new File(name + ".titv.recalibrated.vcf") + val tsRecalibratedVCF = new File(name + ".ts.recalibrated.vcf") + val goldStandardName = qscript.outputDir + "goldStandard/" + baseName + val goldStandardClusterFile = new File(goldStandardName + ".clusters") + } + + val hg18 = new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") + val b36 = new File("/humgen/1kg/reference/human_b36_both.fasta") + val b37 = new File("/humgen/1kg/reference/human_g1k_v37.fasta") + val dbSNP_hg18 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_130_hg18.rod" + val dbSNP_b36 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_130_b36.rod" + val dbSNP_b37 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_132_b37.leftAligned.vcf" + val hapmap_hg18 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.hg18_fwd.vcf" + val hapmap_b36 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b36_fwd.vcf" + val hapmap_b37 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + val indelMask_b36 = "/humgen/1kg/processing/pipeline_test_bams/pilot1.dindel.mask.b36.bed" + val indelMask_b37 = "/humgen/1kg/processing/pipeline_test_bams/pilot1.dindel.mask.b37.bed" + + // ToDos: + // reduce the scope of the datasets so the script is more nimble + // figure out how to give names to all the Queue-LSF logs (other than Q-1931@node1434-24.out) so that it is easier to find logs for certain steps + // create gold standard BAQ'd bam files, no reason to always do it on the fly + + // Analysis to add at the end of the script: + // auto generation of the cluster plots + // spike in NA12878 to the exomes and to the lowpass, analysis of how much of her variants are being recovered compared to single sample exome or HiSeq calls + // produce Kiran's Venn plots based on comparison between new VCF and gold standard produced VCF + + val lowPass: Boolean = true + + val targetDataSets: Map[String, Target] = Map( + "HiSeq" -> new Target("NA12878.HiSeq", hg18, dbSNP_hg18, hapmap_hg18, + "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask", + new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"), + new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"), + "chr1", 2.07, !lowPass), + "FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"), + new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, lowPass), + "WEx" -> new Target("NA12878.WEx", hg18, dbSNP_hg18, hapmap_hg18, + "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask", + new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"), + new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"), + "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 2.6, !lowPass), + "TGPWExGdA" -> new Target("1000G.WEx.GdA", b37, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/1kg/processing/pipeline_test_bams/Barcoded_1000G_WEx_Reduced_Plate_1.cleaned.list"), // BUGBUG: reduce from 60 to 20 people + new File("/humgen/gsa-scr1/delangel/NewUG/calls/AugustRelease.filtered_Q50_QD5.0_SB0.0.allSamples.SNPs_hg19.WEx_UG_newUG_MQC.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, !lowPass), + "LowPassN60" -> new Target("lowpass.N60", b36, dbSNP_b36, hapmap_b36, indelMask_b36, + new File("/humgen/1kg/analysis/bamsForDataProcessingPapers/lowpass_b36/lowpass.chr20.cleaned.matefixed.bam"), // the bam list to call from + new File("/home/radon01/depristo/work/oneOffProjects/VQSRCutByNRS/lowpass.N60.chr20.filtered.vcf"), // the gold standard VCF file to run through the VQSR + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.b36.intervals", 2.3, lowPass), // chunked interval list to use with Queue's scatter/gather functionality + "LowPassAugust" -> new Target("ALL.august.v4", b37, dbSNP_b37, hapmap_b37, indelMask_b37, // BUGBUG: kill this, it is too large + new File("/humgen/1kg/processing/allPopulations_chr20_august_release.cleaned.merged.bams/ALL.cleaned.merged.list"), + new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, lowPass), + "LowPassEUR363Nov" -> new Target("EUR.nov2010", b37, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/1kg/processing/pipeline_test_bams/EUR.363sample.Nov2010.chr20.bam"), + new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, lowPass), + "WExTrio" -> new Target("NA12878Trio.WEx", b37, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/gsa-scr1/carneiro/prj/trio/NA12878Trio.WEx.hg19.bam"), + new File("/humgen/gsa-scr1/delangel/NewUG/calls/AugustRelease.filtered_Q50_QD5.0_SB0.0.allSamples.SNPs_hg19.WEx_UG_newUG_MQC.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, !lowPass) + ) + + def script = { + + // Selects the datasets in the -dataset argument and adds them to targets. + var targets: List[Target] = List() + if (!datasets.isEmpty) + for (ds <- datasets) + targets ::= targetDataSets(ds) // Could check if ds was mispelled, but this way an exception will be thrown, maybe it's better this way? + else // If -dataset is not specified, all datasets are used. + for (targetDS <- targetDataSets.valuesIterator) // for Scala 2.7 or older, use targetDataSets.values + targets ::= targetDS + + val nWays = if (long) List(1, 2, 5, 10) else List(25, 50, 100) + + for (target <- targets) { + for (nWaysParallel <- nWays) { +// for (nWaysParallel <- List(2, 5)) { + val aname = "distN" + nWaysParallel; + val coordinationFile = new File(target.name + "." + aname + ".distributed.txt") + for ( part <- 1 to nWaysParallel) { + add(new UnifiedGenotyper(target, coordinationFile, part, aname + ".part" + part)) + } + } + } + } + + // 1.) Call SNPs with UG + class UnifiedGenotyper(t: Target, coordinationFile: File, i: Int, part: String) extends org.broadinstitute.sting.queue.extensions.gatk.UnifiedGenotyper with UNIVERSAL_GATK_ARGS { + this.reference_sequence = t.reference + this.processingTracker = coordinationFile + this.intervalsString ++= List(t.intervals) + this.dcov = Some( if ( t.isLowpass ) { 50 } else { 250 } ) + this.stand_call_conf = Some( if ( t.isLowpass ) { 4.0 } else { 30.0 } ) + this.stand_emit_conf = Some( if ( t.isLowpass ) { 4.0 } else { 30.0 } ) + this.input_file :+= t.bamList + this.out = t.rawVCF(part) + this.baq = Some( if (noBAQ) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.RECALCULATE}) + this.analysisName = t.name + "_UG." + part + if ( i == 1 ) this.performanceLog = new File(coordinationFile.getAbsolutePath + "." + part + ".pf.log") + if (t.dbsnpFile.endsWith(".rod")) + this.DBSNP = new File(t.dbsnpFile) + else if (t.dbsnpFile.endsWith(".vcf")) + this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile) + } +}