Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
7524b00cbc
60
build.xml
60
build.xml
|
|
@ -780,6 +780,50 @@
|
||||||
</sequential>
|
</sequential>
|
||||||
</macrodef>
|
</macrodef>
|
||||||
|
|
||||||
|
<!-- FAILED-TEST -->
|
||||||
|
<macrodef name="run-failed-test">
|
||||||
|
<attribute name="xmlfailedtestfile" />
|
||||||
|
<sequential>
|
||||||
|
<!-- Get the pipeline run type. Default to dry. -->
|
||||||
|
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
||||||
|
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
||||||
|
</condition>
|
||||||
|
|
||||||
|
<condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
|
||||||
|
<isset property="include.contracts" />
|
||||||
|
</condition>
|
||||||
|
|
||||||
|
<mkdir dir="${report}/failed_rerun" />
|
||||||
|
<echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
|
||||||
|
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
|
||||||
|
<testng outputDir="${report}/failed_rerun"
|
||||||
|
haltOnFailure="false" failureProperty="test.failure"
|
||||||
|
verbose="2"
|
||||||
|
workingDir="${basedir}"
|
||||||
|
useDefaultListeners="false"
|
||||||
|
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
|
||||||
|
<jvmarg value="-Xmx${test.maxmemory}" />
|
||||||
|
<jvmarg value="-Djava.awt.headless=true" />
|
||||||
|
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
|
||||||
|
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
|
||||||
|
<jvmarg line="${cofoja.jvm.args}"/>
|
||||||
|
<!-- <jvmarg value="-Xdebug"/> -->
|
||||||
|
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
|
||||||
|
<classpath>
|
||||||
|
<path refid="external.dependencies" />
|
||||||
|
<pathelement location="${java.classes}" />
|
||||||
|
<pathelement location="${scala.classes}" />
|
||||||
|
<pathelement location="${java.contracts}" />
|
||||||
|
<pathelement location="${java.test.classes}" />
|
||||||
|
<pathelement location="${scala.test.classes}" />
|
||||||
|
</classpath>
|
||||||
|
|
||||||
|
<xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
|
||||||
|
</testng>
|
||||||
|
|
||||||
|
<fail message="test failed" if="test.failure" />
|
||||||
|
</sequential>
|
||||||
|
</macrodef>
|
||||||
|
|
||||||
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
|
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
|
||||||
<target name="test" depends="test.compile,tribble.test" description="Run unit tests">
|
<target name="test" depends="test.compile,tribble.test" description="Run unit tests">
|
||||||
|
|
@ -814,6 +858,22 @@
|
||||||
<run-test testtype="${pipetype}"/>
|
<run-test testtype="${pipetype}"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="failed-test" depends="test.compile">
|
||||||
|
<run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="failed-integration" depends="test.compile">
|
||||||
|
<run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="failed-performance" depends="test.compile">
|
||||||
|
<run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="failed-pipeline" depends="test.compile">
|
||||||
|
<run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
|
||||||
|
</target>
|
||||||
|
|
||||||
<!-- ***************************************************************************** -->
|
<!-- ***************************************************************************** -->
|
||||||
<!-- *********** Tribble ********* -->
|
<!-- *********** Tribble ********* -->
|
||||||
<!-- ***************************************************************************** -->
|
<!-- ***************************************************************************** -->
|
||||||
|
|
|
||||||
|
|
@ -26,16 +26,12 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
import org.broad.tribble.readers.AsciiLineReader;
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.zip.GZIPInputStream;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -58,7 +54,13 @@ public class VCFDiffableReader implements DiffableReader {
|
||||||
VCFCodec vcfCodec = new VCFCodec();
|
VCFCodec vcfCodec = new VCFCodec();
|
||||||
|
|
||||||
// must be read as state is stored in reader itself
|
// must be read as state is stored in reader itself
|
||||||
vcfCodec.readHeader(lineReader);
|
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
|
||||||
|
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
|
||||||
|
String key = headerLine.getKey();
|
||||||
|
if ( headerLine instanceof VCFNamedHeaderLine )
|
||||||
|
key += "_" + ((VCFNamedHeaderLine) headerLine).getName();
|
||||||
|
root.add(key, headerLine.toString());
|
||||||
|
}
|
||||||
|
|
||||||
String line = lineReader.readLine();
|
String line = lineReader.readLine();
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
|
||||||
|
|
@ -242,7 +242,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
return new PhasingStatsAndOutput(phaseStats, completedList);
|
return new PhasingStatsAndOutput(phaseStats, completedList);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList("PQ"));
|
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList(PQ_KEY));
|
||||||
|
|
||||||
private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) {
|
private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) {
|
||||||
// for ( String sample : samplesToPhase )
|
// for ( String sample : samplesToPhase )
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@ import org.broad.tribble.NameAwareCodec;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
@ -96,6 +98,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
for ( String str : headerStrings ) {
|
for ( String str : headerStrings ) {
|
||||||
if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
|
if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
|
||||||
String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR);
|
String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR);
|
||||||
|
if ( strings.length < VCFHeader.HEADER_FIELDS.values().length )
|
||||||
|
throw new TribbleException.InvalidHeader("there are not enough columns present in the header line: " + str);
|
||||||
|
|
||||||
int arrayIndex = 0;
|
int arrayIndex = 0;
|
||||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -159,12 +164,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
}
|
}
|
||||||
|
|
||||||
private Feature reallyDecode(String line) {
|
private Feature reallyDecode(String line) {
|
||||||
try {
|
|
||||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||||
|
|
||||||
// our header cannot be null, we need the genotype sample names and counts
|
// our header cannot be null, we need the genotype sample names and counts
|
||||||
if (header == null) throw new IllegalStateException("VCF Header cannot be null when decoding a record");
|
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
||||||
|
|
||||||
if (parts == null)
|
if (parts == null)
|
||||||
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
||||||
|
|
@ -174,17 +178,18 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||||
if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
|
if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
|
||||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||||
throw new IllegalArgumentException("There aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||||
" tokens, and saw " + nParts + " )");
|
" tokens, and saw " + nParts + " )", lineNo);
|
||||||
|
|
||||||
return parseVCFLine(parts);
|
return parseVCFLine(parts);
|
||||||
} catch (TribbleException e) {
|
|
||||||
throw new TribbleException.InvalidDecodeLine(e.getMessage(), line);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void generateException(String message) {
|
protected void generateException(String message) {
|
||||||
throw new TribbleException.InvalidDecodeLine(message, lineNo);
|
throw new UserException.MalformedVCF(message, lineNo);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void generateException(String message, int lineNo) {
|
||||||
|
throw new UserException.MalformedVCF(message, lineNo);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -472,10 +477,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void generateException(String message, int lineNo) {
|
|
||||||
throw new TribbleException.InvalidDecodeLine(message, lineNo);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
private static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||||
boolean clipping = true;
|
boolean clipping = true;
|
||||||
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
||||||
|
|
|
||||||
|
|
@ -154,6 +154,16 @@ public class UserException extends ReviewedStingException {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class MalformedVCF extends UserException {
|
||||||
|
public MalformedVCF(String message, String line) {
|
||||||
|
super(String.format("The provided VCF file is malformed at line %s: %s", line, message));
|
||||||
|
}
|
||||||
|
|
||||||
|
public MalformedVCF(String message, int lineNo) {
|
||||||
|
super(String.format("The provided VCF file is malformed at line nmber %d: %s", lineNo, message));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static class ReadMissingReadGroup extends MalformedBAM {
|
public static class ReadMissingReadGroup extends MalformedBAM {
|
||||||
public ReadMissingReadGroup(SAMRecord read) {
|
public ReadMissingReadGroup(SAMRecord read) {
|
||||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,9 @@
|
||||||
package org.broadinstitute.sting;
|
package org.broadinstitute.sting;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.Tribble;
|
import org.broad.tribble.Tribble;
|
||||||
|
import org.broad.tribble.index.Index;
|
||||||
import org.broad.tribble.index.IndexFactory;
|
import org.broad.tribble.index.IndexFactory;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||||
|
|
@ -64,10 +66,19 @@ public class WalkerTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
|
System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
|
||||||
Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(resultFile, indexFile, new VCFCodec()), "Index on disk from indexing on the fly not equal to the index created after the run completed");
|
Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec());
|
||||||
|
Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath());
|
||||||
|
|
||||||
|
if ( ! indexFromOutputFile.equals(dynamicIndex) ) {
|
||||||
|
Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n",
|
||||||
|
indexFromOutputFile.getProperties(),
|
||||||
|
dynamicIndex.getProperties()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
|
public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
|
||||||
List<String> md5s = new ArrayList<String>();
|
List<String> md5s = new ArrayList<String>();
|
||||||
for (int i = 0; i < resultFiles.size(); i++) {
|
for (int i = 0; i < resultFiles.size(); i++) {
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@ public class DiffableReaderUnitTest extends BaseTest {
|
||||||
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
|
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
|
||||||
|
|
||||||
DiffNode node = diff.getValueAsNode();
|
DiffNode node = diff.getValueAsNode();
|
||||||
Assert.assertEquals(node.getElements().size(), 9);
|
Assert.assertEquals(node.getElements().size(), 10);
|
||||||
|
|
||||||
// chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03
|
// chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03
|
||||||
DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
|
DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
|
||||||
|
|
|
||||||
|
|
@ -3,14 +3,15 @@ package org.broadinstitute.sting.queue.qscripts
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||||
|
|
||||||
import scala.io.Source._
|
|
||||||
import collection.JavaConversions._
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
|
||||||
import org.broadinstitute.sting.queue.extensions.picard._
|
import org.broadinstitute.sting.queue.extensions.picard._
|
||||||
import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord}
|
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
||||||
|
import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode
|
||||||
|
|
||||||
|
import collection.JavaConversions._
|
||||||
|
import net.sf.samtools.SAMFileReader
|
||||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.queue.util.QScriptUtils
|
||||||
|
|
||||||
class DataProcessingPipeline extends QScript {
|
class DataProcessingPipeline extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
@ -29,7 +30,8 @@ class DataProcessingPipeline extends QScript {
|
||||||
@Input(doc="Reference fasta file", fullName="reference", shortName="R", required=true)
|
@Input(doc="Reference fasta file", fullName="reference", shortName="R", required=true)
|
||||||
var reference: File = _
|
var reference: File = _
|
||||||
|
|
||||||
|
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
|
||||||
|
var dbSNP: File = _
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Optional Parameters
|
* Optional Parameters
|
||||||
|
|
@ -39,14 +41,12 @@ class DataProcessingPipeline extends QScript {
|
||||||
// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
|
// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
|
||||||
// var sortSamJar: File = _
|
// var sortSamJar: File = _
|
||||||
//
|
//
|
||||||
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
|
||||||
var bwaPath: File = _
|
|
||||||
|
|
||||||
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=false)
|
|
||||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
|
||||||
|
|
||||||
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
|
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
|
||||||
var indels: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf")
|
var indels: File = _
|
||||||
|
|
||||||
|
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
||||||
|
var bwaPath: File = _
|
||||||
|
|
||||||
@Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false)
|
@Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false)
|
||||||
var projectName: String = "project"
|
var projectName: String = "project"
|
||||||
|
|
@ -103,18 +103,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
val ds: String)
|
val ds: String)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
// Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that)
|
|
||||||
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
|
||||||
var sample: String = ""
|
|
||||||
for (r <- readGroups) {
|
|
||||||
if (sample.isEmpty)
|
|
||||||
sample = r.getSample
|
|
||||||
else if (sample != r.getSample)
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
|
// Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
|
||||||
// It uses the sample information on the header of the input BAM files.
|
// It uses the sample information on the header of the input BAM files.
|
||||||
//
|
//
|
||||||
|
|
@ -135,7 +123,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
|
|
||||||
// only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
|
// only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
|
||||||
// with PrintReads to separate the samples. Tell user to do it himself!
|
// with PrintReads to separate the samples. Tell user to do it himself!
|
||||||
assert(!hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
assert(!QScriptUtils.hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
||||||
|
|
||||||
// Fill out the sample table with the readgroups in this file
|
// Fill out the sample table with the readgroups in this file
|
||||||
for (rg <- readGroups) {
|
for (rg <- readGroups) {
|
||||||
|
|
@ -166,12 +154,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
return sampleBamFiles.toMap
|
return sampleBamFiles.toMap
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checks how many contigs are in the dataset. Uses the BAM file header information.
|
|
||||||
def getNumberOfContigs(bamFile: File): Int = {
|
|
||||||
val samReader = new SAMFileReader(new File(bamFile))
|
|
||||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rebuilds the Read Group string to give BWA
|
// Rebuilds the Read Group string to give BWA
|
||||||
def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) {
|
def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) {
|
||||||
val readGroups = samReader.getFileHeader.getReadGroups
|
val readGroups = samReader.getFileHeader.getReadGroups
|
||||||
|
|
@ -215,19 +197,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
return realignedBams
|
return realignedBams
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads a BAM LIST file and creates a scala list with all the files
|
|
||||||
def createListFromFile(in: File):List[File] = {
|
|
||||||
if (in.toString.endsWith("bam"))
|
|
||||||
return List(in)
|
|
||||||
var l: List[File] = List()
|
|
||||||
for (bam <- fromFile(in).getLines) {
|
|
||||||
if (!bam.startsWith("#") && !bam.isEmpty)
|
|
||||||
l :+= new File(bam.trim)
|
|
||||||
}
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Main script
|
* Main script
|
||||||
|
|
@ -237,8 +206,8 @@ class DataProcessingPipeline extends QScript {
|
||||||
def script = {
|
def script = {
|
||||||
|
|
||||||
// keep a record of the number of contigs in the first bam file in the list
|
// keep a record of the number of contigs in the first bam file in the list
|
||||||
val bams = createListFromFile(input)
|
val bams = QScriptUtils.createListFromFile(input)
|
||||||
nContigs = getNumberOfContigs(bams(0))
|
nContigs = QScriptUtils.getNumberOfContigs(bams(0))
|
||||||
|
|
||||||
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
|
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
|
||||||
|
|
||||||
|
|
@ -319,7 +288,8 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.out = outIntervals
|
this.out = outIntervals
|
||||||
this.mismatchFraction = 0.0
|
this.mismatchFraction = 0.0
|
||||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||||
this.rodBind :+= RodBind("indels", "VCF", indels)
|
if (!indels.isEmpty)
|
||||||
|
this.rodBind :+= RodBind("indels", "VCF", indels)
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = nContigs
|
||||||
this.analysisName = queueLogDir + outIntervals + ".target"
|
this.analysisName = queueLogDir + outIntervals + ".target"
|
||||||
this.jobName = queueLogDir + outIntervals + ".target"
|
this.jobName = queueLogDir + outIntervals + ".target"
|
||||||
|
|
@ -330,7 +300,8 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.targetIntervals = tIntervals
|
this.targetIntervals = tIntervals
|
||||||
this.out = outBam
|
this.out = outBam
|
||||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||||
this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
|
if (!indels.isEmpty)
|
||||||
|
this.rodBind :+= RodBind("indels", "VCF", indels)
|
||||||
this.consensusDeterminationModel = consensusDeterminationModel
|
this.consensusDeterminationModel = consensusDeterminationModel
|
||||||
this.compress = 0
|
this.compress = 0
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = nContigs
|
||||||
|
|
@ -353,7 +324,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
||||||
this.input_file :+= inBam
|
this.input_file :+= inBam
|
||||||
this.recal_file = inRecalFile
|
this.recal_file = inRecalFile
|
||||||
this.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY
|
this.baq = CalculationMode.CALCULATE_AS_NECESSARY
|
||||||
this.out = outBam
|
this.out = outBam
|
||||||
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
|
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
|
||||||
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
|
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.queue.qscripts
|
||||||
|
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import net.sf.samtools.SAMFileReader
|
import org.broadinstitute.sting.queue.util.QScriptUtils
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -32,26 +32,25 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
val queueLogDir: String = ".qlog/"
|
val queueLogDir: String = ".qlog/"
|
||||||
var nContigs: Int = 0
|
var nContigs: Int = 0
|
||||||
|
|
||||||
def getNumberOfContigs(bamFile: File): Int = {
|
|
||||||
val samReader = new SAMFileReader(new File(bamFile))
|
|
||||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
|
||||||
}
|
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
|
|
||||||
nContigs = getNumberOfContigs(input)
|
val bamList = QScriptUtils.createListFromFile(input)
|
||||||
|
nContigs = QScriptUtils.getNumberOfContigs(bamList(0))
|
||||||
|
|
||||||
val recalFile1: File = swapExt(input, ".bam", ".recal1.csv")
|
for (bam <- bamList) {
|
||||||
val recalFile2: File = swapExt(input, ".bam", ".recal2.csv")
|
|
||||||
val recalBam: File = swapExt(input, ".bam", ".recal.bam")
|
val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv")
|
||||||
val path1: String = input + "before"
|
val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv")
|
||||||
val path2: String = input + "after"
|
val recalBam: File = swapExt(bam, ".bam", ".recal.bam")
|
||||||
|
val path1: String = bam + ".before"
|
||||||
add(cov(input, recalFile1),
|
val path2: String = bam + ".after"
|
||||||
recal(input, recalFile1, recalBam),
|
|
||||||
cov(recalBam, recalFile2),
|
add(cov(bam, recalFile1),
|
||||||
analyzeCovariates(recalFile1, path1),
|
recal(bam, recalFile1, recalBam),
|
||||||
analyzeCovariates(recalFile2, path2))
|
cov(recalBam, recalFile2),
|
||||||
|
analyzeCovariates(recalFile1, path1),
|
||||||
|
analyzeCovariates(recalFile2, path2))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||||
|
|
@ -84,7 +83,7 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||||
this.resources = R
|
this.resources = R
|
||||||
this.recal_file = inRecalFile
|
this.recal_file = inRecalFile
|
||||||
this.output_dir = outPath.toString
|
this.output_dir = outPath
|
||||||
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||||
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.broadinstitute.sting.queue.util
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
import io.Source._
|
||||||
|
import net.sf.samtools.{SAMReadGroupRecord, SAMFileReader}
|
||||||
|
|
||||||
|
import collection.JavaConversions._
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: carneiro
|
||||||
|
* Date: 7/14/11
|
||||||
|
* Time: 4:57 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
|
||||||
|
object QScriptUtils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a bam list file and produces a scala list with each file allowing the bam list
|
||||||
|
* to have empty lines and comment lines (lines starting with #).
|
||||||
|
*/
|
||||||
|
def createListFromFile(in: File):List[File] = {
|
||||||
|
// If the file provided ends with .bam, it is not a bam list, we treat it as a single file.
|
||||||
|
// and return a list with only this file.
|
||||||
|
if (in.toString.endsWith(".bam"))
|
||||||
|
return List(in)
|
||||||
|
|
||||||
|
var list: List[File] = List()
|
||||||
|
for (bam <- fromFile(in).getLines)
|
||||||
|
if (!bam.startsWith("#") && !bam.isEmpty )
|
||||||
|
list :+= new File(bam.trim())
|
||||||
|
list
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of contigs in the BAM file header.
|
||||||
|
*/
|
||||||
|
def getNumberOfContigs(bamFile: File): Int = {
|
||||||
|
val samReader = new SAMFileReader(bamFile)
|
||||||
|
samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if there are multiple samples in a BAM file
|
||||||
|
*/
|
||||||
|
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
||||||
|
var sample: String = ""
|
||||||
|
for (r <- readGroups) {
|
||||||
|
if (sample.isEmpty)
|
||||||
|
sample = r.getSample
|
||||||
|
else if (sample != r.getSample)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Binary file not shown.
|
|
@ -1,4 +1,4 @@
|
||||||
<ivy-module version="1.0">
|
<ivy-module version="1.0">
|
||||||
<info organisation="org.broad" module="tribble" revision="3"
|
<info organisation="org.broad" module="tribble" revision="4"
|
||||||
status="integration" publication="" />
|
status="integration" publication="" />
|
||||||
</ivy-module>
|
</ivy-module>
|
||||||
Loading…
Reference in New Issue