Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/stable

This commit is contained in:
Matt Hanna 2011-08-17 16:26:45 -04:00
commit 2b2a4e0795
11 changed files with 63 additions and 48 deletions

View File

@ -61,6 +61,8 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
// Check to see if we have a called snp
for ( VariantContext vc : vcs ) {
if ( vc.isFiltered() )
continue;
if ( !vc.getSource().startsWith("snpmask") ) {
if ( vc.isDeletion()) {
deletionBasesRemaining = vc.getReference().length();

View File

@ -239,8 +239,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// now with banding
public int linearExactBanded(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
throw new NotImplementedException();
// final int numSamples = GLs.size();
// final int numChr = 2*numSamples;
@ -419,7 +419,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
/**
* Can be overridden by concrete subclasses
* @param vc variant context with genotype likelihoods
@ -517,10 +517,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
double qual = Double.NEGATIVE_INFINITY;
double[] likelihoods = g.getLikelihoods().getAsVector();
/* System.out.format("Sample: %s GL:",sample);
for (int i=0; i < likelihoods.length; i++)
System.out.format("%1.4f ",likelihoods[i]);
*/
/* System.out.format("Sample: %s GL:",sample);
for (int i=0; i < likelihoods.length; i++)
System.out.format("%1.4f ",likelihoods[i]);
*/
for (int i=0; i < likelihoods.length; i++) {
if (i==bestGTguess)
@ -531,14 +531,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// qual contains now max(likelihoods[k]) for all k != bestGTguess
qual = likelihoods[bestGTguess] - qual;
// likelihoods are stored row-wise in upper triangular matrix. IE
// likelihoods are stored row-wise in lower triangular matrix. IE
// for 2 alleles they have ordering AA,AB,BB
// for 3 alleles they are ordered AA,AB,AC,BB,BC,CC
// for 3 alleles they are ordered AA,AB,BB,AC,BC,CC
// Get now alleles corresponding to best index
int kk=0;
boolean done = false;
for (int i=0; i < vc.getNAlleles(); i++) {
for (int j=i; j < vc.getNAlleles(); j++){
for (int j=0; j < vc.getNAlleles(); j++) {
for (int i=0; i <= j; i++){
if (kk++ == bestGTguess) {
if (i==0)
myAlleles.add(vc.getReference());
@ -565,7 +565,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
double chosenGenotype = normalized[bestGTguess];
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
}
//System.out.println(myAlleles.toString());
//System.out.println(myAlleles.toString());
calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
}
@ -592,7 +592,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
for (int j=0; j <=numChr; j++)
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
//YMatrix[0][0] = 1.0;
//YMatrix[0][0] = 1.0;
logYMatrix[0][0] = 0.0;
int j=0;

View File

@ -68,7 +68,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
if ( vc == null ) {
vc = vc_input;
} else {
logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record only");
logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record");
}
}
}

View File

@ -485,6 +485,9 @@ public class UnifiedGenotyperEngine {
Map<String, AlignmentContext> stratifiedContexts = null;
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
return null;
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) {
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
@ -499,6 +502,7 @@ public class UnifiedGenotyperEngine {
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
} else {
// todo - tmp will get rid of extended events so this wont be needed
if (!rawContext.hasExtendedEventPileup())
return null;
@ -516,9 +520,6 @@ public class UnifiedGenotyperEngine {
}
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
return null;
// stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);

View File

@ -50,7 +50,7 @@ public class VariantRecalibratorArgumentCollection {
@Argument(fullName="numKMeans", shortName="nKM", doc="The number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model.", required=false)
public int NUM_KMEANS_ITERATIONS = 30;
@Argument(fullName="stdThreshold", shortName="std", doc="If a variant has annotations more than -std standard deviations away from mean then don't use it for building the Gaussian mixture model.", required=false)
public double STD_THRESHOLD = 8.0;
public double STD_THRESHOLD = 14.0;
@Argument(fullName="qualThreshold", shortName="qual", doc="If a known variant has raw QUAL value less than -qual then don't use it for building the Gaussian mixture model.", required=false)
public double QUAL_THRESHOLD = 80.0;
@Argument(fullName="shrinkage", shortName="shrinkage", doc="The shrinkage parameter in variational Bayes algorithm.", required=false)

View File

@ -135,6 +135,11 @@ public class NWaySAMFileWriter implements SAMFileWriter {
public void addAlignment(SAMRecord samRecord) {
final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
String rg = samRecord.getStringAttribute("RG");
if ( rg != null ) {
String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg);
samRecord.setAttribute("RG",rg_orig);
}
writerMap.get(id).addAlignment(samRecord);
}

View File

@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
}
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"d33212a84368e821cbedecd4f59756d6", // tranches
"4652dca41222bebdf9d9fda343b2a835", // recal file
"243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
"58780f63182e139fdbe17f6c18b5b774", // recal file
"f67d844b6252a55452cf4167b77530b1"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.queue.qscripts
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.function.ListWriterFunction
import org.broadinstitute.sting.queue.extensions.picard._
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode
@ -12,6 +11,7 @@ import net.sf.samtools.SAMFileReader
import net.sf.samtools.SAMFileHeader.SortOrder
import org.broadinstitute.sting.queue.util.QScriptUtils
import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction}
class DataProcessingPipeline extends QScript {
qscript =>
@ -283,12 +283,6 @@ class DataProcessingPipeline extends QScript {
****************************************************************************/
// General arguments to GATK walkers
trait CommandLineGATKArgs extends CommandLineGATK {
this.reference_sequence = qscript.reference
this.memoryLimit = 4
this.isIntermediate = true
}
// General arguments to non-GATK tools
trait ExternalCommonArgs extends CommandLineFunction {
@ -296,6 +290,14 @@ class DataProcessingPipeline extends QScript {
this.isIntermediate = true
}
// General arguments to GATK walkers
trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs {
this.reference_sequence = qscript.reference
}
trait SAMargs extends PicardBamFunction with ExternalCommonArgs {
this.maxRecordsInRam = 100000
}
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
@ -303,7 +305,7 @@ class DataProcessingPipeline extends QScript {
this.out = outIntervals
this.mismatchFraction = 0.0
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
if (!indels.isEmpty)
if (indels != null)
this.rodBind :+= RodBind("indels", "VCF", indels)
this.scatterCount = nContigs
this.analysisName = queueLogDir + outIntervals + ".target"
@ -311,11 +313,12 @@ class DataProcessingPipeline extends QScript {
}
case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input_file :+= inBams
this.targetIntervals = tIntervals
this.out = outBam
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
if (!qscript.indels.isEmpty)
if (qscript.indels != null)
this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
this.consensusDeterminationModel = consensusDeterminationModel
this.compress = 0
@ -365,6 +368,7 @@ class DataProcessingPipeline extends QScript {
}
case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = List(inBam)
this.output = outBam
this.metrics = metricsFile
@ -373,6 +377,7 @@ class DataProcessingPipeline extends QScript {
}
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = inBams
this.output = outBam
this.analysisName = queueLogDir + outBam + ".joinBams"
@ -380,6 +385,7 @@ class DataProcessingPipeline extends QScript {
}
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = List(inSam)
this.output = outBam
this.sortOrder = sortOrderP
@ -390,7 +396,6 @@ class DataProcessingPipeline extends QScript {
case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs {
this.input = List(inBam)
this.output = outLog
this.maxRecordsInRam = 100000
this.REFERENCE_SEQUENCE = qscript.reference
this.isIntermediate = false
this.analysisName = queueLogDir + outLog + ".validate"
@ -399,6 +404,7 @@ class DataProcessingPipeline extends QScript {
case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = List(inBam)
this.output = outBam
this.RGID = readGroup.id
@ -408,8 +414,6 @@ class DataProcessingPipeline extends QScript {
this.RGPL = readGroup.pl
this.RGPU = readGroup.pu
this.RGSM = readGroup.sm
this.memoryLimit = 4
this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".rg"
this.jobName = queueLogDir + outBam + ".rg"
}
@ -435,6 +439,7 @@ class DataProcessingPipeline extends QScript {
@Input(doc="bwa alignment index file") var sai = inSai
@Output(doc="output aligned bam file") var alignedBam = outBam
def commandLine = bwaPath + " samse " + reference + " " + sai + " " + bam + " > " + alignedBam
this.memoryLimit = 6
this.analysisName = queueLogDir + outBam + ".bwa_sam_se"
this.jobName = queueLogDir + outBam + ".bwa_sam_se"
}
@ -445,6 +450,7 @@ class DataProcessingPipeline extends QScript {
@Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2
@Output(doc="output aligned bam file") var alignedBam = outBam
def commandLine = bwaPath + " sampe " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam
this.memoryLimit = 6
this.analysisName = queueLogDir + outBam + ".bwa_sam_pe"
this.jobName = queueLogDir + outBam + ".bwa_sam_pe"
}
@ -455,6 +461,4 @@ class DataProcessingPipeline extends QScript {
this.analysisName = queueLogDir + outBamList + ".bamList"
this.jobName = queueLogDir + outBamList + ".bamList"
}
}

View File

@ -13,7 +13,7 @@ class GATKResourcesBundle extends QScript {
var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar")
@Argument(doc="liftOverPerl", required=false)
var liftOverPerl: File = new File("./perl/liftOverVCF.pl")
var liftOverPerl: File = new File("./public/perl/liftOverVCF.pl")
@Argument(shortName = "ver", doc="The SVN version of this release", required=true)
var VERSION: String = _
@ -57,11 +57,11 @@ class GATKResourcesBundle extends QScript {
//Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name)
(inRef.name, outRef.name) match {
case ("b37", "hg19") =>
return new LiftOverPerl(in, out, new File("chainFiles/b37tohg19.chain"), inRef, outRef)
return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg19.chain"), inRef, outRef)
case ("b37", "hg18") =>
return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef)
return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg18.chain"), inRef, outRef)
case ("b37", "b36") =>
return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef)
return new LiftOverPerl(in, out, new File("public/chainFiles/b37tob36.chain"), inRef, outRef)
case _ => return null
}
}
@ -85,7 +85,7 @@ class GATKResourcesBundle extends QScript {
//
b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta"))
hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta"))
exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta"))
exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta"))
refs = List(b37, hg18, exampleFASTA)
val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/"
@ -94,7 +94,7 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false))
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
}
def initializeStandardDataFiles() = {
@ -105,7 +105,7 @@ class GATKResourcesBundle extends QScript {
b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta"))
hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"))
b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta"))
exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta"))
exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta"))
refs = List(hg19, b37, hg18, b36, exampleFASTA)
addResource(new Resource(b37.file, "", b37, false))
@ -134,6 +134,9 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf",
"1000G_indels_for_realignment", b37, true, false))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
"indels_mills_devine", b37, true, true))
//
// example call set for wiki tutorial
//
@ -152,8 +155,8 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt",
"refGene", b37, true, false))
addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false))
addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false))
addResource(new Resource("public/chainFiles/hg18tob37.chain", "", hg18, false, false))
addResource(new Resource("public/chainFiles/b36tob37.chain", "", b36, false, false))
// todo -- chain files?
// todo 1000G SNP and indel call sets?
@ -162,7 +165,7 @@ class GATKResourcesBundle extends QScript {
// exampleFASTA file
//
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
}
def createBundleDirectories(dir: File) = {

View File

@ -59,10 +59,10 @@ class ExampleUnifiedGenotyper extends QScript {
evalUnfiltered.rodBind :+= RodBind("eval", "VCF", genotyper.out)
evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval")
variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.out)
variantFilter.rodBind :+= RodBind("variant", "VCF", genotyper.out)
variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf")
variantFilter.filterName = filterNames
variantFilter.filterExpression = filterExpressions
variantFilter.filterExpression = filterExpressions.map("\"" + _ + "\"")
evalFiltered.rodBind :+= RodBind("eval", "VCF", variantFilter.out)
evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval")

View File

@ -52,7 +52,7 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu
updateStatus(RunnerStatus.RUNNING)
job.run()
updateStatus(RunnerStatus.FAILED)
updateStatus(RunnerStatus.DONE)
}
override def checkUnknownStatus() {}