Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
928f646afd
63
build.xml
63
build.xml
|
|
@ -669,21 +669,13 @@
|
|||
</jar>
|
||||
</target>
|
||||
|
||||
<target name="variant.jar" depends="gatk.compile,init.jar">
|
||||
<jar jarfile="${dist.dir}/variant.jar">
|
||||
<fileset dir="${java.classes}">
|
||||
<include name="org/broadinstitute/variant/**/*.class"/>
|
||||
</fileset>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
<target name="na12878kb.jar" depends="gatk.compile,init.jar">
|
||||
<jar jarfile="${dist.dir}/na12878kb.jar">
|
||||
<fileset dir="${java.classes}">
|
||||
<include name="org/broadinstitute/sting/utils/GenomeLocParser*.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/GenomeLoc.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/HasGenomeLocation.class"/>
|
||||
<include name="org/broadinstitute/variant/utils/BaseUtils.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/BaseUtils.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/Utils.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/exceptions/**/*.class"/>
|
||||
<include name="org/broadinstitute/sting/gatk/walkers/na12878kb/core/**/*.class"/>
|
||||
|
|
@ -753,7 +745,7 @@
|
|||
</jar>
|
||||
</target>
|
||||
|
||||
<target name="sting.jar" depends="sting-utils.jar, variant.jar, gatk.jar, queue.jar" />
|
||||
<target name="sting.jar" depends="sting-utils.jar, gatk.jar, queue.jar" />
|
||||
|
||||
<target name="init.manifests" depends="sting.jar">
|
||||
<pathconvert property="jar.classpath" pathsep=" ">
|
||||
|
|
@ -873,14 +865,18 @@
|
|||
<property name="executable" value="GenomeAnalysisTK" />
|
||||
</target>
|
||||
|
||||
<target name="init.executable.queueall" depends="init.build.all, init.javaandscala">
|
||||
<property name="executable" value="Queue" />
|
||||
<target name="init.executable.gatkall" depends="init.build.all, init.javaonly">
|
||||
<property name="executable" value="GenomeAnalysisTK" />
|
||||
</target>
|
||||
|
||||
<target name="init.executable.queuefull" depends="init.build.publicprotectedonly, init.javaandscala">
|
||||
<property name="executable" value="Queue" />
|
||||
</target>
|
||||
|
||||
<target name="init.executable.queueall" depends="init.build.all, init.javaandscala">
|
||||
<property name="executable" value="Queue" />
|
||||
</target>
|
||||
|
||||
<target name="require.executable">
|
||||
<condition property="no.executable.defined">
|
||||
<or>
|
||||
|
|
@ -929,12 +925,17 @@
|
|||
</target>
|
||||
|
||||
<!-- Package specific versions of the GATK/Queue. ALWAYS do an ant clean before invoking these! -->
|
||||
|
||||
<!-- GATK "full" == public + protected, ie., the standard binary release of the GATK -->
|
||||
<target name="package.gatk.full" depends="init.executable.gatkfull,package" />
|
||||
|
||||
<target name="package.queue.all" depends="init.executable.queueall,package" />
|
||||
<!-- GATK "all" == public + protected + private. Should never be publicly released -->
|
||||
<target name="package.gatk.all" depends="init.executable.gatkall,package" />
|
||||
|
||||
<target name="package.queue.full" depends="init.executable.queuefull,package" />
|
||||
|
||||
<target name="package.queue.all" depends="init.executable.queueall,package" />
|
||||
|
||||
<!-- Release a build. Don't call this target directly. Call one of the specific release targets below -->
|
||||
<target name="release" depends="require.executable" description="release a build, putting each file in a location specified by the package">
|
||||
<ant antfile="${package.output.dir}/${executable}.xml" target="release" />
|
||||
|
|
@ -1081,7 +1082,7 @@
|
|||
<property name="report" value="${build.dir}/report"/>
|
||||
<property name="iwww.report.dir" value="${user.home}/private_html/report"/>
|
||||
<property name="test.output" value="${dist.dir}/test"/>
|
||||
<property name="testng.jar" value="${lib.dir}/testng-5.14.1.jar"/>
|
||||
<property name="testng.jar" value="${lib.dir}/testng-6.8.jar"/>
|
||||
|
||||
<path id="java.test.source.path">
|
||||
<dirset dir="${basedir}">
|
||||
|
|
@ -1112,7 +1113,7 @@
|
|||
|
||||
<path id="testng.default.classpath">
|
||||
<path refid="build.results" />
|
||||
<pathelement path="${clover.jar}"/>
|
||||
<pathelement path="${clover.jar}"/>
|
||||
<pathelement location="${java.contracts.dir}" />
|
||||
<pathelement location="${java.test.classes}" />
|
||||
<pathelement location="${scala.test.classes}" />
|
||||
|
|
@ -1122,7 +1123,7 @@
|
|||
|
||||
<target name="clover.report">
|
||||
<clover-report coverageCacheSize="nocache">
|
||||
<current outfile="clover_html" title="GATK clover report" showUniqueCoverage="false" numThreads="4">
|
||||
<current outfile="clover_html" title="GATK clover report" showUniqueCoverage="false" numThreads="4">
|
||||
<format type="html" filter="catch,static,property"/>
|
||||
<fileset dir="public">
|
||||
<patternset id="clover.excludes">
|
||||
|
|
@ -1252,7 +1253,7 @@
|
|||
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
|
||||
<jvmarg value="-Xmx${test.maxmemory}" />
|
||||
<jvmarg value="-ea" />
|
||||
<jvmarg value="-Dclover.pertest.coverage=diff" />
|
||||
<jvmarg value="-Dclover.pertest.coverage=diff" />
|
||||
<jvmarg value="-Djava.awt.headless=true" />
|
||||
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
|
||||
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
|
||||
|
|
@ -1295,7 +1296,7 @@
|
|||
|
||||
<target name="test.init">
|
||||
<property name="testng.classpath" value="testng.default.classpath" />
|
||||
<property name="test.maxmemory" value="${test.default.maxmemory}"/>
|
||||
<property name="test.maxmemory" value="${test.default.maxmemory}"/>
|
||||
</target>
|
||||
|
||||
<target name="init.testgatkjar">
|
||||
|
|
@ -1450,4 +1451,30 @@
|
|||
|
||||
<run-test testtype="${single}" outputdir="${report}/${single}" runfailed="false"/>
|
||||
</target>
|
||||
|
||||
<!-- A target that runs a test without doing ANY compilation or any extra work at all -->
|
||||
<!-- Intended to enable parallel tests that share the same working directory and build -->
|
||||
<target name="runtestonly">
|
||||
<condition property="not.clean">
|
||||
<and>
|
||||
<available file="${build.dir}" />
|
||||
<available file="${lib.dir}" />
|
||||
<available file="${dist.dir}" />
|
||||
<available file="${java.test.classes}" />
|
||||
</and>
|
||||
</condition>
|
||||
<fail message="runtestonly target requires a NON-CLEAN working directory (INCLUDING test classes). Do a full test build using ant test.compile first." unless="not.clean" />
|
||||
|
||||
<condition property="no.single.test.specified">
|
||||
<equals arg1="${single}" arg2="$${single}" />
|
||||
</condition>
|
||||
<fail message="Must specify a specific test. Usage: ant runtestonly -Dsingle=TestClass" if="no.single.test.specified" />
|
||||
|
||||
<property name="testng.classpath" value="testng.default.classpath" />
|
||||
<property name="test.maxmemory" value="${test.default.maxmemory}"/>
|
||||
<property name="include.contracts" value="true" />
|
||||
|
||||
<run-test testtype="${single}" outputdir="${report}/${single}" runfailed="false"/>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
|||
8
ivy.xml
8
ivy.xml
|
|
@ -35,9 +35,14 @@
|
|||
<!-- Tribble -->
|
||||
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
|
||||
|
||||
<!-- Variant -->
|
||||
<dependency org="org.broadinstitute" name="variant" rev="latest.integration"/>
|
||||
|
||||
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
||||
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
||||
<dependency org="colt" name="colt" rev="1.2.0"/>
|
||||
<dependency org="it.unimi.dsi" name="fastutil" rev="6.5.3" />
|
||||
|
||||
<!-- <dependency org="jboss" name="javassist" rev="3.7.ga"/> -->
|
||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
|
||||
|
|
@ -81,9 +86,10 @@
|
|||
<dependency org="org.scala-lang" name="scala-library" rev="2.9.2"/>
|
||||
|
||||
<!-- testing and evaluation dependencies -->
|
||||
<dependency org="org.testng" name="testng" rev="5.14.1"/>
|
||||
<dependency org="org.testng" name="testng" rev="6.8"/>
|
||||
<dependency org="org.uncommons" name="reportng" rev="1.1.2"/>
|
||||
<dependency org="com.google.caliper" name="caliper" rev="0.5-rc1"/>
|
||||
<dependency org="com.google.inject" name="guice" rev="3.0"/>
|
||||
|
||||
<!-- Contracts for Java and dependencies -->
|
||||
<dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
|
||||
|
|
|
|||
|
|
@ -50,10 +50,13 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
|
||||
import org.broadinstitute.sting.utils.collections.DefaultHashMap;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
|
|
@ -118,6 +121,33 @@ public class StandardCallerArgumentCollection {
|
|||
public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION;
|
||||
public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05;
|
||||
|
||||
/**
|
||||
* This argument specifies a file with two columns "sample" and "contamination" specifying the contamination level for those samples.
|
||||
* Samples that do not appear in this file will be processed with CONTAMINATION_FRACTION
|
||||
**/
|
||||
@Advanced
|
||||
@Argument(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header.", required = false)
|
||||
public File CONTAMINATION_FRACTION_FILE = null;
|
||||
|
||||
/**
|
||||
*
|
||||
* @return an _Immutable_ copy of the Sample-Contamination Map, defaulting to CONTAMINATION_FRACTION so that if the sample isn't in the map map(sample)==CONTAMINATION_FRACTION
|
||||
*/
|
||||
public Map<String,Double> getSampleContamination(){
|
||||
//make sure that the default value is set up right
|
||||
sampleContamination.setDefaultValue(CONTAMINATION_FRACTION);
|
||||
return Collections.unmodifiableMap(sampleContamination);
|
||||
}
|
||||
|
||||
public void setSampleContamination(DefaultHashMap<String, Double> sampleContamination) {
|
||||
this.sampleContamination.clear();
|
||||
this.sampleContamination.putAll(sampleContamination);
|
||||
this.sampleContamination.setDefaultValue(CONTAMINATION_FRACTION);
|
||||
}
|
||||
|
||||
//Needs to be here because it uses CONTAMINATION_FRACTION
|
||||
private DefaultHashMap<String,Double> sampleContamination = new DefaultHashMap<String,Double>(CONTAMINATION_FRACTION);
|
||||
|
||||
/**
|
||||
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
|
||||
*/
|
||||
|
|
@ -145,8 +175,10 @@ public class StandardCallerArgumentCollection {
|
|||
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
this.CONTAMINATION_FRACTION = SCAC.CONTAMINATION_FRACTION;
|
||||
this.CONTAMINATION_FRACTION_FILE=SCAC.CONTAMINATION_FRACTION_FILE;
|
||||
this.contaminationLog = SCAC.contaminationLog;
|
||||
this.exactCallsLog = SCAC.exactCallsLog;
|
||||
this.sampleContamination=SCAC.sampleContamination;
|
||||
this.AFmodel = SCAC.AFmodel;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ import java.util.Map;
|
|||
* over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
|
||||
* N samples with -dcov D is N * D
|
||||
*/
|
||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
public class Coverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
|
|
@ -142,7 +142,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(
|
||||
new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
|
||||
new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
|
||||
}
|
||||
|
||||
private Double pValueForContingencyTable(int[][] originalTable) {
|
||||
|
|
@ -176,7 +176,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
//System.out.printf("P-cutoff: %f\n", pCutoff);
|
||||
//System.out.printf("P-value: %f\n\n", pValue);
|
||||
|
||||
return pValue;
|
||||
// min is necessary as numerical precision can result in pValue being slightly greater than 1.0
|
||||
return Math.min(pValue, 1.0);
|
||||
}
|
||||
|
||||
private static int [][] copyContingencyTable(int [][] t) {
|
||||
|
|
@ -222,14 +223,14 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
// calculate in log space so we don't die with high numbers
|
||||
double pCutoff = Arithmetic.logFactorial(rowSums[0])
|
||||
+ Arithmetic.logFactorial(rowSums[1])
|
||||
+ Arithmetic.logFactorial(colSums[0])
|
||||
+ Arithmetic.logFactorial(colSums[1])
|
||||
- Arithmetic.logFactorial(table[0][0])
|
||||
- Arithmetic.logFactorial(table[0][1])
|
||||
- Arithmetic.logFactorial(table[1][0])
|
||||
- Arithmetic.logFactorial(table[1][1])
|
||||
- Arithmetic.logFactorial(N);
|
||||
+ Arithmetic.logFactorial(rowSums[1])
|
||||
+ Arithmetic.logFactorial(colSums[0])
|
||||
+ Arithmetic.logFactorial(colSums[1])
|
||||
- Arithmetic.logFactorial(table[0][0])
|
||||
- Arithmetic.logFactorial(table[0][1])
|
||||
- Arithmetic.logFactorial(table[1][0])
|
||||
- Arithmetic.logFactorial(table[1][1])
|
||||
- Arithmetic.logFactorial(N);
|
||||
return Math.exp(pCutoff);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAn
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -69,7 +70,7 @@ import java.util.Map;
|
|||
/**
|
||||
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
|
|
|
|||
|
|
@ -91,8 +91,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if (vc.isSNP() && stratifiedContexts != null)
|
||||
return annotatePileup(ref, stratifiedContexts, vc);
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null && vc.isVariant())
|
||||
return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
|
@ -133,31 +131,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> annotateWithLikelihoods(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final VariantContext vc) {
|
||||
|
||||
final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
|
||||
for (final Genotype genotype : vc.getGenotypes()) {
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
if (perReadAlleleLikelihoodMap == null)
|
||||
continue;
|
||||
|
||||
Double d = scoreIndelsAgainstHaplotypes(perReadAlleleLikelihoodMap);
|
||||
if (d == null)
|
||||
continue;
|
||||
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
|
||||
// if (scoreRA.observationCount() == 0)
|
||||
// return null;
|
||||
|
||||
// annotate the score in the info field
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean()));
|
||||
return map;
|
||||
|
||||
}
|
||||
|
||||
private static class HaplotypeComparator implements Comparator<Haplotype>, Serializable {
|
||||
|
||||
public int compare(Haplotype a, Haplotype b) {
|
||||
|
|
@ -180,7 +153,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
for (final PileupElement p : pileup) {
|
||||
final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
|
||||
candidateHaplotypeQueue.add(haplotypeFromRead);
|
||||
if ( haplotypeFromRead != null )
|
||||
candidateHaplotypeQueue.add(haplotypeFromRead);
|
||||
}
|
||||
|
||||
// Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
|
||||
|
|
@ -230,8 +204,18 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a haplotype object constructed from the read or null if read's cigar is null
|
||||
*
|
||||
* @param p pileup element representing the read
|
||||
* @param contextSize the context size to use
|
||||
* @param locus the position
|
||||
* @return possibly null Haplotype object constructed from the read
|
||||
*/
|
||||
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
if ( read.getCigar() == null )
|
||||
return null;
|
||||
|
||||
final byte[] haplotypeBases = new byte[contextSize];
|
||||
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
|
||||
|
|
@ -347,6 +331,10 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
double expected = 0.0;
|
||||
double mismatches = 0.0;
|
||||
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
if ( read.getCigar() == null )
|
||||
return 0.0;
|
||||
|
||||
// What's the expected mismatch rate under the model that this read is actually sampled from
|
||||
// this haplotype? Let's assume the consensus base c is a random choice one of A, C, G, or T, and that
|
||||
// the observed base is actually from a c with an error rate e. Since e is the rate at which we'd
|
||||
|
|
@ -358,14 +346,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
// the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
|
||||
// so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
|
||||
final byte[] haplotypeBases = haplotype.getBases();
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
byte[] readBases = read.getReadBases();
|
||||
|
||||
readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string
|
||||
byte[] readQuals = read.getBaseQualities();
|
||||
readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
||||
int readOffsetFromPileup = p.getOffset();
|
||||
readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus);
|
||||
int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus);
|
||||
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
|
||||
|
||||
for (int i = 0; i < contextSize; i++) {
|
||||
|
|
@ -399,39 +385,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
return mismatches - expected;
|
||||
}
|
||||
|
||||
|
||||
private Double scoreIndelsAgainstHaplotypes(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
|
||||
final ArrayList<double[]> haplotypeScores = new ArrayList<double[]>();
|
||||
|
||||
if (perReadAlleleLikelihoodMap.isEmpty())
|
||||
return null;
|
||||
|
||||
for (Map<Allele,Double> el : perReadAlleleLikelihoodMap.getLikelihoodMapValues()) {
|
||||
|
||||
// retrieve likelihood information corresponding to this read
|
||||
// Score all the reads in the pileup, even the filtered ones
|
||||
final double[] scores = new double[el.size()];
|
||||
int i = 0;
|
||||
for (Map.Entry<Allele, Double> a : el.entrySet()) {
|
||||
scores[i++] = -a.getValue();
|
||||
if (DEBUG) {
|
||||
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
haplotypeScores.add(scores);
|
||||
}
|
||||
|
||||
// indel likelihoods are strict log-probs, not phred scored
|
||||
double overallScore = 0.0;
|
||||
for (final double[] readHaplotypeScores : haplotypeScores) {
|
||||
overallScore += MathUtils.arrayMin(readHaplotypeScores);
|
||||
}
|
||||
|
||||
return overallScore;
|
||||
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList("HaplotypeScore");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
|||
if (alleleLikelihoodMap == null) {
|
||||
// use old UG SNP-based version if we don't have per-read allele likelihoods
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( isUsableBase(p) ) {
|
||||
if ( isUsableBase(p) && p.getRead().getCigar() != null ) {
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
||||
|
||||
readPos = getFinalReadPosition(p.getRead(),readPos);
|
||||
|
|
@ -103,26 +103,26 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
|||
}
|
||||
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
if (a.isNoCall())
|
||||
continue; // read is non-informative
|
||||
|
||||
final GATKSAMRecord read = el.getKey();
|
||||
if ( read.getSoftStart() + read.getCigar().getReadLength() <= refLoc ) { // make sure the read actually covers the requested ref loc
|
||||
continue;
|
||||
}
|
||||
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
|
||||
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
|
||||
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED || read.getCigar() == null )
|
||||
continue;
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 );
|
||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
|
||||
if (readPos > numAlignedBases / 2)
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
|
||||
// int readPos = getOffsetFromClippedReadStart(el.getKey(), el.getKey().getOffset());
|
||||
// readPos = getFinalReadPosition(el.getKey().getRead(),readPos);
|
||||
|
||||
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
if (a.isNoCall())
|
||||
continue; // read is non-informative
|
||||
if (a.isReference())
|
||||
refQuals.add((double)readPos);
|
||||
else if (allAlleles.contains(a))
|
||||
altQuals.add((double)readPos);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -67,6 +67,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.recalibration.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -127,7 +128,7 @@ import java.util.List;
|
|||
* </pre>
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature(groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class})
|
||||
@DocumentedGATKFeature(groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class})
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
|
|
@ -214,6 +215,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
}
|
||||
|
||||
initializeRecalibrationEngine();
|
||||
RecalUtils.checkForInvalidRecalBams(getToolkit().getSAMFileHeaders(), getToolkit().getArguments().ALLOW_BQSR_ON_REDUCED_BAMS);
|
||||
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
|
||||
referenceReader = getToolkit().getReferenceDataSource().getReference();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -179,6 +179,6 @@ public final class ReadRecalibrationInfo {
|
|||
}
|
||||
|
||||
private boolean validQual(final byte result) {
|
||||
return result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE;
|
||||
return result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -262,8 +262,12 @@ public class RecalibrationArgumentCollection {
|
|||
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("mismatches_default_quality", true);
|
||||
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("deletions_default_quality", true);
|
||||
argumentsTable.set("deletions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("insertions_default_quality", true);
|
||||
argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("maximum_cycle_value", true);
|
||||
argumentsTable.set("maximum_cycle_value", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MAXIMUM_CYCLE_VALUE);
|
||||
argumentsTable.addRowID("low_quality_tail", true);
|
||||
argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
|
||||
argumentsTable.addRowID("default_platform", true);
|
||||
|
|
|
|||
|
|
@ -53,42 +53,82 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
* @since 6/15/12
|
||||
*/
|
||||
public class BaseAndQualsCounts extends BaseCounts {
|
||||
private final long[] sumInsertionQuals;
|
||||
private final long[] sumDeletionQuals;
|
||||
|
||||
public BaseAndQualsCounts() {
|
||||
super();
|
||||
this.sumInsertionQuals = new long[BaseIndex.values().length];
|
||||
this.sumDeletionQuals = new long[BaseIndex.values().length];
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
sumInsertionQuals[i.index] = 0L;
|
||||
sumDeletionQuals[i.index] = 0L;
|
||||
}
|
||||
}
|
||||
private long sumInsertionQual_A = 0;
|
||||
private long sumDeletionQual_A = 0;
|
||||
private long sumInsertionQual_C = 0;
|
||||
private long sumDeletionQual_C = 0;
|
||||
private long sumInsertionQual_G = 0;
|
||||
private long sumDeletionQual_G = 0;
|
||||
private long sumInsertionQual_T = 0;
|
||||
private long sumDeletionQual_T = 0;
|
||||
private long sumInsertionQual_D = 0;
|
||||
private long sumDeletionQual_D = 0;
|
||||
private long sumInsertionQual_I = 0;
|
||||
private long sumDeletionQual_I = 0;
|
||||
private long sumInsertionQual_N = 0;
|
||||
private long sumDeletionQual_N = 0;
|
||||
|
||||
|
||||
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual) {
|
||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||
super.incr(i, baseQual);
|
||||
sumInsertionQuals[i.index] += insQual;
|
||||
sumDeletionQuals[i.index] += delQual;
|
||||
switch (i) {
|
||||
case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break;
|
||||
case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break;
|
||||
case G: sumInsertionQual_G += insQual; sumDeletionQual_G += delQual; break;
|
||||
case T: sumInsertionQual_T += insQual; sumDeletionQual_T += delQual; break;
|
||||
case D: sumInsertionQual_D += insQual; sumDeletionQual_D += delQual; break;
|
||||
case I: sumInsertionQual_I += insQual; sumDeletionQual_I += delQual; break;
|
||||
case N: sumInsertionQual_N += insQual; sumDeletionQual_N += delQual; break;
|
||||
}
|
||||
}
|
||||
|
||||
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual) {
|
||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||
super.decr(i, baseQual);
|
||||
sumInsertionQuals[i.index] -= insQual;
|
||||
sumDeletionQuals[i.index] -= delQual;
|
||||
switch (i) {
|
||||
case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break;
|
||||
case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break;
|
||||
case G: sumInsertionQual_G -= insQual; sumDeletionQual_G -= delQual; break;
|
||||
case T: sumInsertionQual_T -= insQual; sumDeletionQual_T -= delQual; break;
|
||||
case D: sumInsertionQual_D -= insQual; sumDeletionQual_D -= delQual; break;
|
||||
case I: sumInsertionQual_I -= insQual; sumDeletionQual_I -= delQual; break;
|
||||
case N: sumInsertionQual_N -= insQual; sumDeletionQual_N -= delQual; break;
|
||||
}
|
||||
}
|
||||
|
||||
public byte averageInsertionQualsOfBase(final BaseIndex base) {
|
||||
return getGenericAverageQualOfBase(base, sumInsertionQuals);
|
||||
return (byte) (getInsertionQual(base) / countOfBase(base));
|
||||
}
|
||||
|
||||
public byte averageDeletionQualsOfBase(final BaseIndex base) {
|
||||
return getGenericAverageQualOfBase(base, sumDeletionQuals);
|
||||
return (byte) (getDeletionQual(base) / countOfBase(base));
|
||||
}
|
||||
|
||||
private byte getGenericAverageQualOfBase(final BaseIndex base, final long[] sumQuals) {
|
||||
return (byte) (sumQuals[base.index] / countOfBase(base));
|
||||
private long getInsertionQual(final BaseIndex base) {
|
||||
switch (base) {
|
||||
case A: return sumInsertionQual_A;
|
||||
case C: return sumInsertionQual_C;
|
||||
case G: return sumInsertionQual_G;
|
||||
case T: return sumInsertionQual_T;
|
||||
case D: return sumInsertionQual_D;
|
||||
case I: return sumInsertionQual_I;
|
||||
case N: return sumInsertionQual_N;
|
||||
default: throw new IllegalArgumentException(base.name());
|
||||
}
|
||||
}
|
||||
|
||||
private long getDeletionQual(final BaseIndex base) {
|
||||
switch (base) {
|
||||
case A: return sumDeletionQual_A;
|
||||
case C: return sumDeletionQual_C;
|
||||
case G: return sumDeletionQual_G;
|
||||
case T: return sumDeletionQual_T;
|
||||
case D: return sumDeletionQual_D;
|
||||
case I: return sumDeletionQual_I;
|
||||
case N: return sumDeletionQual_N;
|
||||
default: throw new IllegalArgumentException(base.name());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,73 +62,107 @@ import com.google.java.contract.Requires;
|
|||
public final static BaseIndex MAX_BASE_INDEX_WITH_NO_COUNTS = BaseIndex.N;
|
||||
public final static byte MAX_BASE_WITH_NO_COUNTS = MAX_BASE_INDEX_WITH_NO_COUNTS.getByte();
|
||||
|
||||
private final int[] counts; // keeps track of the base counts
|
||||
private final long[] sumQuals; // keeps track of the quals of each base
|
||||
|
||||
private int count_A = 0; // keeps track of the base counts
|
||||
private int sumQual_A = 0; // keeps track of the quals of each base
|
||||
private int count_C = 0;
|
||||
private int sumQual_C = 0;
|
||||
private int count_G = 0;
|
||||
private int sumQual_G = 0;
|
||||
private int count_T = 0;
|
||||
private int sumQual_T = 0;
|
||||
private int count_D = 0;
|
||||
private int sumQual_D = 0;
|
||||
private int count_I = 0;
|
||||
private int sumQual_I = 0;
|
||||
private int count_N = 0;
|
||||
private int sumQual_N = 0;
|
||||
private int totalCount = 0; // keeps track of total count since this is requested so often
|
||||
|
||||
public BaseCounts() {
|
||||
counts = new int[BaseIndex.values().length];
|
||||
sumQuals = new long[BaseIndex.values().length];
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
counts[i.index] = 0;
|
||||
sumQuals[i.index] = 0L;
|
||||
}
|
||||
}
|
||||
|
||||
public static BaseCounts createWithCounts(int[] countsACGT) {
|
||||
BaseCounts baseCounts = new BaseCounts();
|
||||
baseCounts.counts[BaseIndex.A.index] = countsACGT[0];
|
||||
baseCounts.counts[BaseIndex.C.index] = countsACGT[1];
|
||||
baseCounts.counts[BaseIndex.G.index] = countsACGT[2];
|
||||
baseCounts.counts[BaseIndex.T.index] = countsACGT[3];
|
||||
baseCounts.count_A = countsACGT[0];
|
||||
baseCounts.count_C = countsACGT[1];
|
||||
baseCounts.count_G = countsACGT[2];
|
||||
baseCounts.count_T = countsACGT[3];
|
||||
baseCounts.totalCount = countsACGT[0] + countsACGT[1] + countsACGT[2] + countsACGT[3];
|
||||
return baseCounts;
|
||||
}
|
||||
|
||||
@Requires("other != null")
|
||||
public void add(final BaseCounts other) {
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
final int otherCount = other.counts[i.index];
|
||||
counts[i.index] += otherCount;
|
||||
totalCount += otherCount;
|
||||
}
|
||||
this.count_A += other.count_A;
|
||||
this.count_C += other.count_C;
|
||||
this.count_G += other.count_G;
|
||||
this.count_T += other.count_T;
|
||||
this.count_D += other.count_D;
|
||||
this.count_I += other.count_I;
|
||||
this.count_N += other.count_N;
|
||||
this.totalCount += other.totalCount;
|
||||
}
|
||||
|
||||
@Requires("other != null")
|
||||
public void sub(final BaseCounts other) {
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
final int otherCount = other.counts[i.index];
|
||||
counts[i.index] -= otherCount;
|
||||
totalCount -= otherCount;
|
||||
}
|
||||
this.count_A -= other.count_A;
|
||||
this.count_C -= other.count_C;
|
||||
this.count_G -= other.count_G;
|
||||
this.count_T -= other.count_T;
|
||||
this.count_D -= other.count_D;
|
||||
this.count_I -= other.count_I;
|
||||
this.count_N -= other.count_N;
|
||||
this.totalCount -= other.totalCount;
|
||||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
|
||||
public void incr(final byte base) {
|
||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||
counts[i.index]++;
|
||||
totalCount++;
|
||||
add(BaseIndex.byteToBase(base), 1);
|
||||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
|
||||
public void incr(final BaseIndex base, final byte qual) {
|
||||
counts[base.index]++;
|
||||
totalCount++;
|
||||
sumQuals[base.index] += qual;
|
||||
switch (base) {
|
||||
case A: ++count_A; sumQual_A += qual; break;
|
||||
case C: ++count_C; sumQual_C += qual; break;
|
||||
case G: ++count_G; sumQual_G += qual; break;
|
||||
case T: ++count_T; sumQual_T += qual; break;
|
||||
case D: ++count_D; sumQual_D += qual; break;
|
||||
case I: ++count_I; sumQual_I += qual; break;
|
||||
case N: ++count_N; sumQual_N += qual; break;
|
||||
}
|
||||
++totalCount;
|
||||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
|
||||
public void decr(final byte base) {
|
||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||
counts[i.index]--;
|
||||
totalCount--;
|
||||
add(BaseIndex.byteToBase(base), -1);
|
||||
}
|
||||
|
||||
private void add(final BaseIndex base, int amount) {
|
||||
switch(base) {
|
||||
case A: count_A += amount; break;
|
||||
case C: count_C += amount; break;
|
||||
case G: count_G += amount; break;
|
||||
case T: count_T += amount; break;
|
||||
case D: count_D += amount; break;
|
||||
case I: count_I += amount; break;
|
||||
case N: count_N += amount; break;
|
||||
}
|
||||
totalCount += amount;
|
||||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
|
||||
public void decr(final BaseIndex base, final byte qual) {
|
||||
counts[base.index]--;
|
||||
totalCount--;
|
||||
sumQuals[base.index] -= qual;
|
||||
switch (base) {
|
||||
case A: --count_A; sumQual_A -= qual; break;
|
||||
case C: --count_C; sumQual_C -= qual; break;
|
||||
case G: --count_G; sumQual_G -= qual; break;
|
||||
case T: --count_T; sumQual_T -= qual; break;
|
||||
case D: --count_D; sumQual_D -= qual; break;
|
||||
case I: --count_I; sumQual_I -= qual; break;
|
||||
case N: --count_N; sumQual_N -= qual; break;
|
||||
}
|
||||
--totalCount;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
|
|
@ -138,12 +172,21 @@ import com.google.java.contract.Requires;
|
|||
|
||||
@Ensures("result >= 0")
|
||||
public long getSumQuals(final BaseIndex base) {
|
||||
return sumQuals[base.index];
|
||||
switch (base) {
|
||||
case A: return sumQual_A;
|
||||
case C: return sumQual_C;
|
||||
case G: return sumQual_G;
|
||||
case T: return sumQual_T;
|
||||
case D: return sumQual_D;
|
||||
case I: return sumQual_I;
|
||||
case N: return sumQual_N;
|
||||
default: throw new IllegalArgumentException(base.name());
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public byte averageQuals(final byte base) {
|
||||
return (byte) (getSumQuals(base) / countOfBase(base));
|
||||
return averageQuals(BaseIndex.byteToBase(base));
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
|
|
@ -158,12 +201,21 @@ import com.google.java.contract.Requires;
|
|||
|
||||
@Ensures("result >= 0")
|
||||
public int countOfBase(final BaseIndex base) {
|
||||
return counts[base.index];
|
||||
switch (base) {
|
||||
case A: return count_A;
|
||||
case C: return count_C;
|
||||
case G: return count_G;
|
||||
case T: return count_T;
|
||||
case D: return count_D;
|
||||
case I: return count_I;
|
||||
case N: return count_N;
|
||||
default: throw new IllegalArgumentException(base.name());
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public long sumQualsOfBase(final BaseIndex base) {
|
||||
return sumQuals[base.index];
|
||||
return getSumQuals(base);
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
|
|
@ -196,14 +248,14 @@ import com.google.java.contract.Requires;
|
|||
*/
|
||||
@Ensures({"result >=0.0", "result<= 1.0"})
|
||||
public double baseCountProportion(final BaseIndex baseIndex) {
|
||||
return (totalCount == 0) ? 0.0 : (double)counts[baseIndex.index] / (double)totalCount;
|
||||
return (totalCount == 0) ? 0.0 : (double)countOfBase(baseIndex) / (double)totalCount;
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public String toString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
b.append(i.toString()).append("=").append(counts[i.index]).append(",");
|
||||
b.append(i.toString()).append("=").append(countOfBase(i)).append(",");
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
|
@ -216,7 +268,7 @@ import com.google.java.contract.Requires;
|
|||
public BaseIndex baseIndexWithMostCounts() {
|
||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
if (counts[i.index] > counts[maxI.index])
|
||||
if (countOfBase(i) > countOfBase(maxI))
|
||||
maxI = i;
|
||||
}
|
||||
return maxI;
|
||||
|
|
@ -226,18 +278,12 @@ import com.google.java.contract.Requires;
|
|||
public BaseIndex baseIndexWithMostCountsWithoutIndels() {
|
||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
if (i.isNucleotide() && counts[i.index] > counts[maxI.index])
|
||||
if (i.isNucleotide() && countOfBase(i) > countOfBase(maxI))
|
||||
maxI = i;
|
||||
}
|
||||
return maxI;
|
||||
}
|
||||
|
||||
private boolean hasHigherCount(final BaseIndex targetIndex, final BaseIndex testIndex) {
|
||||
final int targetCount = counts[targetIndex.index];
|
||||
final int testCount = counts[testIndex.index];
|
||||
return ( targetCount > testCount || (targetCount == testCount && sumQuals[targetIndex.index] > sumQuals[testIndex.index]) );
|
||||
}
|
||||
|
||||
public byte baseWithMostProbability() {
|
||||
return baseIndexWithMostProbability().getByte();
|
||||
}
|
||||
|
|
@ -246,25 +292,25 @@ import com.google.java.contract.Requires;
|
|||
public BaseIndex baseIndexWithMostProbability() {
|
||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
if (sumQuals[i.index] > sumQuals[maxI.index])
|
||||
if (getSumQuals(i) > getSumQuals(maxI))
|
||||
maxI = i;
|
||||
}
|
||||
return (sumQuals[maxI.index] > 0L ? maxI : baseIndexWithMostCounts());
|
||||
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCounts());
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public BaseIndex baseIndexWithMostProbabilityWithoutIndels() {
|
||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
if (i.isNucleotide() && sumQuals[i.index] > sumQuals[maxI.index])
|
||||
if (i.isNucleotide() && getSumQuals(i) > getSumQuals(maxI))
|
||||
maxI = i;
|
||||
}
|
||||
return (sumQuals[maxI.index] > 0L ? maxI : baseIndexWithMostCountsWithoutIndels());
|
||||
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCountsWithoutIndels());
|
||||
}
|
||||
|
||||
@Ensures("result >=0")
|
||||
public int totalCountWithoutIndels() {
|
||||
return totalCount - counts[BaseIndex.D.index] - counts[BaseIndex.I.index];
|
||||
return totalCount - countOfBase(BaseIndex.D) - countOfBase(BaseIndex.I);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -277,10 +323,6 @@ import com.google.java.contract.Requires;
|
|||
@Ensures({"result >=0.0", "result<= 1.0"})
|
||||
public double baseCountProportionWithoutIndels(final BaseIndex base) {
|
||||
final int total = totalCountWithoutIndels();
|
||||
return (total == 0) ? 0.0 : (double)counts[base.index] / (double)total;
|
||||
}
|
||||
|
||||
public int[] countsArray() {
|
||||
return counts.clone();
|
||||
return (total == 0) ? 0.0 : (double)countOfBase(base) / (double)total;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,6 +69,12 @@ public enum BaseIndex {
|
|||
|
||||
public byte getByte() { return b; }
|
||||
|
||||
/**
|
||||
* Ordinal is stored in SyntheticRead rather than enum to save object reference, and store as byte for compactness.
|
||||
* It is stored as byte, and this method merely eliminates a cast.
|
||||
*/
|
||||
public byte getOrdinalByte() { return (byte)ordinal(); }
|
||||
|
||||
private BaseIndex(char base, int index) {
|
||||
this.b = (byte)base;
|
||||
this.index = index;
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -87,7 +88,7 @@ import java.util.Map;
|
|||
* @since 10/30/11
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
|
||||
@Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;
|
||||
|
|
|
|||
|
|
@ -46,10 +46,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.ObjectAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectSortedSet;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
/**
|
||||
* A stash of regions that must be kept uncompressed in all samples
|
||||
|
|
@ -61,7 +63,7 @@ import java.util.TreeSet;
|
|||
* Date: 10/15/12
|
||||
* Time: 4:08 PM
|
||||
*/
|
||||
public class CompressionStash extends TreeSet<FinishedGenomeLoc> {
|
||||
public class CompressionStash extends ObjectAVLTreeSet<FinishedGenomeLoc> {
|
||||
public CompressionStash() {
|
||||
super();
|
||||
}
|
||||
|
|
@ -75,7 +77,7 @@ public class CompressionStash extends TreeSet<FinishedGenomeLoc> {
|
|||
*/
|
||||
@Override
|
||||
public boolean add(final FinishedGenomeLoc insertLoc) {
|
||||
TreeSet<FinishedGenomeLoc> removedLocs = new TreeSet<FinishedGenomeLoc>();
|
||||
ObjectSortedSet<FinishedGenomeLoc> removedLocs = new ObjectAVLTreeSet<FinishedGenomeLoc>();
|
||||
for (FinishedGenomeLoc existingLoc : this) {
|
||||
if (existingLoc.isPast(insertLoc)) {
|
||||
break; // if we're past the loc we're done looking for overlaps.
|
||||
|
|
|
|||
|
|
@ -46,11 +46,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* The element that describes the header of the sliding window.
|
||||
|
|
@ -65,7 +64,7 @@ public class HeaderElement {
|
|||
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
|
||||
private int nSoftClippedBases; // How many bases in this site came from soft clipped bases
|
||||
private int location; // Genome location of this site (the sliding window knows which contig we're at
|
||||
private LinkedList<Integer> mappingQuality; // keeps the mapping quality of each read that contributed to this element (site)
|
||||
private IntArrayList mappingQuality; // keeps the mapping quality of each read that contributed to this element (site)
|
||||
|
||||
public int getLocation() {
|
||||
return location;
|
||||
|
|
@ -86,7 +85,7 @@ public class HeaderElement {
|
|||
* @param location the reference location for the new element
|
||||
*/
|
||||
public HeaderElement(final int location) {
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location, new LinkedList<Integer>());
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location, new IntArrayList());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -96,7 +95,7 @@ public class HeaderElement {
|
|||
* @param location the reference location for the new element
|
||||
*/
|
||||
public HeaderElement(final int location, final int insertionsToTheRight) {
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location, new LinkedList<Integer>());
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location, new IntArrayList());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -110,7 +109,7 @@ public class HeaderElement {
|
|||
* @param mappingQuality the list of mapping quality values of all reads that contributed to this
|
||||
* HeaderElement
|
||||
*/
|
||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location, LinkedList<Integer> mappingQuality) {
|
||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location, IntArrayList mappingQuality) {
|
||||
this.consensusBaseCounts = consensusBaseCounts;
|
||||
this.filteredBaseCounts = filteredBaseCounts;
|
||||
this.insertionsToTheRight = insertionsToTheRight;
|
||||
|
|
@ -268,24 +267,26 @@ public class HeaderElement {
|
|||
* Calculates the number of haplotypes necessary to represent this site.
|
||||
*
|
||||
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||
* @return the number of haplotypes necessary to represent this site.
|
||||
* @return the number of alleles necessary to represent this site.
|
||||
*/
|
||||
public int getNumberOfHaplotypes(double minVariantProportion) {
|
||||
int nHaplotypes = 0;
|
||||
int totalCount = consensusBaseCounts.totalCount();
|
||||
int runningCount = 0;
|
||||
|
||||
if (totalCount == 0)
|
||||
public int getNumberOfAlleles(final double minVariantProportion) {
|
||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
||||
if (totalBaseCount == 0)
|
||||
return 0;
|
||||
|
||||
int[] countsArray = consensusBaseCounts.countsArray();
|
||||
Arrays.sort(countsArray);
|
||||
for (int i = countsArray.length-1; i>=0; i--) {
|
||||
nHaplotypes++;
|
||||
runningCount += countsArray[i];
|
||||
if (runningCount/totalCount > minVariantProportion)
|
||||
break;
|
||||
final int minBaseCountForRelevantAlleles = (int)(minVariantProportion * totalBaseCount);
|
||||
|
||||
int nAlleles = 0;
|
||||
for ( BaseIndex base : BaseIndex.values() ) {
|
||||
final int baseCount = consensusBaseCounts.countOfBase(base);
|
||||
|
||||
// don't consider this allele if the count is 0
|
||||
if ( baseCount == 0 )
|
||||
continue;
|
||||
|
||||
if ( baseCount >= minBaseCountForRelevantAlleles )
|
||||
nAlleles++;
|
||||
}
|
||||
return nHaplotypes;
|
||||
return nAlleles;
|
||||
}
|
||||
}
|
||||
|
|
@ -46,6 +46,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.*;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
@ -54,10 +55,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -91,7 +88,7 @@ import java.util.TreeSet;
|
|||
public class MultiSampleCompressor {
|
||||
protected static final Logger logger = Logger.getLogger(MultiSampleCompressor.class);
|
||||
|
||||
protected Map<String, SingleSampleCompressor> compressorsPerSample = new HashMap<String, SingleSampleCompressor>();
|
||||
protected Object2ObjectMap<String, SingleSampleCompressor> compressorsPerSample = new Object2ObjectOpenHashMap<String, SingleSampleCompressor>();
|
||||
|
||||
public MultiSampleCompressor(SAMFileHeader header,
|
||||
final int contextSize,
|
||||
|
|
@ -101,22 +98,21 @@ public class MultiSampleCompressor {
|
|||
final double minIndelProportionToTriggerVariant,
|
||||
final int minBaseQual,
|
||||
final ReduceReads.DownsampleStrategy downsampleStrategy,
|
||||
final int nContigs,
|
||||
final boolean allowPolyploidReduction) {
|
||||
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
|
||||
compressorsPerSample.put(name,
|
||||
new SingleSampleCompressor(contextSize, downsampleCoverage,
|
||||
minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction));
|
||||
minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, allowPolyploidReduction));
|
||||
}
|
||||
}
|
||||
|
||||
public Set<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
|
||||
public ObjectSet<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
|
||||
String sampleName = read.getReadGroup().getSample();
|
||||
SingleSampleCompressor compressor = compressorsPerSample.get(sampleName);
|
||||
if ( compressor == null )
|
||||
throw new ReviewedStingException("No compressor for sample " + sampleName);
|
||||
Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = compressor.addAlignment(read);
|
||||
Set<GATKSAMRecord> reads = readsAndStash.getFirst();
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> readsAndStash = compressor.addAlignment(read);
|
||||
ObjectSet<GATKSAMRecord> reads = readsAndStash.getFirst();
|
||||
CompressionStash regions = readsAndStash.getSecond();
|
||||
|
||||
reads.addAll(closeVariantRegionsInAllSamples(regions));
|
||||
|
|
@ -124,17 +120,17 @@ public class MultiSampleCompressor {
|
|||
return reads;
|
||||
}
|
||||
|
||||
public Set<GATKSAMRecord> close() {
|
||||
Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
public ObjectSet<GATKSAMRecord> close() {
|
||||
ObjectSet<GATKSAMRecord> reads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
for ( SingleSampleCompressor sample : compressorsPerSample.values() ) {
|
||||
Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = sample.close();
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> readsAndStash = sample.close();
|
||||
reads = readsAndStash.getFirst();
|
||||
}
|
||||
return reads;
|
||||
}
|
||||
|
||||
private Set<GATKSAMRecord> closeVariantRegionsInAllSamples(CompressionStash regions) {
|
||||
Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
private ObjectSet<GATKSAMRecord> closeVariantRegionsInAllSamples(CompressionStash regions) {
|
||||
ObjectSet<GATKSAMRecord> reads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
if (!regions.isEmpty()) {
|
||||
for (SingleSampleCompressor sample : compressorsPerSample.values()) {
|
||||
reads.addAll(sample.closeVariantRegions(regions));
|
||||
|
|
|
|||
|
|
@ -46,6 +46,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectSortedSet;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
|
|
@ -56,23 +60,21 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.sam.BySampleSAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Reduces the BAM file using read based compression that keeps only essential information for variant calling
|
||||
|
|
@ -104,10 +106,11 @@ import java.util.*;
|
|||
* </pre>
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
|
||||
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
|
||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=40)
|
||||
public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, ReduceReadsStash> {
|
||||
|
||||
@Output
|
||||
private StingSAMFileWriter out = null;
|
||||
|
|
@ -213,14 +216,6 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
@Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false)
|
||||
private int downsampleCoverage = 250;
|
||||
|
||||
/**
|
||||
* Number of chromossomes in the sample (this is used for the polyploid consensus compression). Only
|
||||
* tested for humans (or organisms with n=2). Use at your own risk!
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false)
|
||||
private int nContigs = 2;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "nwayout", shortName = "nw", doc = "", required = false)
|
||||
private boolean nwayout = false;
|
||||
|
|
@ -248,14 +243,13 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
|
||||
int nCompressedReads = 0;
|
||||
|
||||
HashMap<String, Long> readNameHash; // This hash will keep the name of the original read the new compressed name (a number).
|
||||
Object2LongOpenHashMap<String> readNameHash; // This hash will keep the name of the original read the new compressed name (a number).
|
||||
Long nextReadNumber = 1L; // The next number to use for the compressed read name.
|
||||
|
||||
CompressionStash compressionStash = new CompressionStash();
|
||||
ObjectSortedSet<GenomeLoc> intervalList;
|
||||
|
||||
SortedSet<GenomeLoc> intervalList;
|
||||
|
||||
private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag
|
||||
// IMPORTANT: DO NOT CHANGE THE VALUE OF THIS CONSTANT VARIABLE; IT IS NOW PERMANENTLY THE @PG NAME THAT EXTERNAL TOOLS LOOK FOR IN THE BAM HEADER
|
||||
public static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag
|
||||
private static final String PROGRAM_FILENAME_EXTENSION = ".reduced.bam";
|
||||
|
||||
/**
|
||||
|
|
@ -266,8 +260,8 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
public void initialize() {
|
||||
super.initialize();
|
||||
GenomeAnalysisEngine toolkit = getToolkit();
|
||||
readNameHash = new HashMap<String, Long>(); // prepare the read name hash to keep track of what reads have had their read names compressed
|
||||
intervalList = new TreeSet<GenomeLoc>(); // get the interval list from the engine. If no interval list was provided, the walker will work in WGS mode
|
||||
readNameHash = new Object2LongOpenHashMap<String>(100000); // prepare the read name hash to keep track of what reads have had their read names compressed
|
||||
intervalList = new ObjectAVLTreeSet<GenomeLoc>(); // get the interval list from the engine. If no interval list was provided, the walker will work in WGS mode
|
||||
|
||||
if (toolkit.getIntervals() != null)
|
||||
intervalList.addAll(toolkit.getIntervals());
|
||||
|
|
@ -304,8 +298,8 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
* @return a linked list with all the reads produced by the clipping operations
|
||||
*/
|
||||
@Override
|
||||
public LinkedList<GATKSAMRecord> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
LinkedList<GATKSAMRecord> mappedReads;
|
||||
public ObjectArrayList<GATKSAMRecord> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
ObjectArrayList<GATKSAMRecord> mappedReads;
|
||||
if (!debugRead.isEmpty() && read.getReadName().contains(debugRead))
|
||||
System.out.println("Found debug read!");
|
||||
|
||||
|
|
@ -334,18 +328,18 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
if (HARD_CLIP_TO_INTERVAL)
|
||||
mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval
|
||||
else {
|
||||
mappedReads = new LinkedList<GATKSAMRecord>();
|
||||
mappedReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
mappedReads.add(read);
|
||||
}
|
||||
}
|
||||
else {
|
||||
mappedReads = new LinkedList<GATKSAMRecord>();
|
||||
mappedReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
if (!read.isEmpty())
|
||||
mappedReads.add(read);
|
||||
}
|
||||
|
||||
if (!mappedReads.isEmpty() && !DONT_USE_SOFTCLIPPED_BASES) {
|
||||
LinkedList<GATKSAMRecord> tempList = new LinkedList<GATKSAMRecord>();
|
||||
ObjectArrayList<GATKSAMRecord> tempList = new ObjectArrayList<GATKSAMRecord>();
|
||||
for (GATKSAMRecord mRead : mappedReads) {
|
||||
GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualitySoftClips(mRead, minBaseQual);
|
||||
if (!clippedRead.isEmpty())
|
||||
|
|
@ -372,7 +366,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
*/
|
||||
@Override
|
||||
public ReduceReadsStash reduceInit() {
|
||||
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION));
|
||||
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, USE_POLYPLOID_REDUCTION));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -384,7 +378,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
* @param stash the stash that keeps the reads in order for processing
|
||||
* @return the stash with all reads that have not been processed yet
|
||||
*/
|
||||
public ReduceReadsStash reduce(LinkedList<GATKSAMRecord> mappedReads, ReduceReadsStash stash) {
|
||||
public ReduceReadsStash reduce(ObjectArrayList<GATKSAMRecord> mappedReads, ReduceReadsStash stash) {
|
||||
if (debugLevel == 1)
|
||||
stash.print();
|
||||
|
||||
|
|
@ -396,7 +390,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
throw new ReviewedStingException("Empty read sent to reduce, this should never happen! " + read.getReadName() + " -- " + read.getCigar() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd());
|
||||
|
||||
if (originalRead) {
|
||||
List<GATKSAMRecord> readsReady = new LinkedList<GATKSAMRecord>();
|
||||
ObjectArrayList<GATKSAMRecord> readsReady = new ObjectArrayList<GATKSAMRecord>();
|
||||
readsReady.addAll(stash.getAllReadsBefore(read));
|
||||
readsReady.add(read);
|
||||
|
||||
|
|
@ -442,8 +436,8 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
* @param read the read to be hard clipped to the interval.
|
||||
* @return a shallow copy of the read hard clipped to the interval
|
||||
*/
|
||||
private LinkedList<GATKSAMRecord> hardClipReadToInterval(GATKSAMRecord read) {
|
||||
LinkedList<GATKSAMRecord> clippedReads = new LinkedList<GATKSAMRecord>();
|
||||
private ObjectArrayList<GATKSAMRecord> hardClipReadToInterval(GATKSAMRecord read) {
|
||||
ObjectArrayList<GATKSAMRecord> clippedReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
GenomeLoc intervalOverlapped = null; // marks the interval to which the original read overlapped (so we can cut all previous intervals from the list)
|
||||
|
||||
|
|
@ -597,7 +591,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
System.out.println("BAM: " + read.getCigar() + " " + read.getAlignmentStart() + " " + read.getAlignmentEnd());
|
||||
|
||||
if (!DONT_COMPRESS_READ_NAMES)
|
||||
compressReadName(read);
|
||||
nextReadNumber = compressReadName(readNameHash, read, nextReadNumber);
|
||||
|
||||
writerToUse.addAlignment(read);
|
||||
}
|
||||
|
|
@ -632,20 +626,28 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
* Compresses the read name using the readNameHash if we have already compressed
|
||||
* this read name before.
|
||||
*
|
||||
* @param read any read
|
||||
* @param hash the hash table containing the read name to compressed read name map
|
||||
* @param read any read
|
||||
* @param nextReadNumber the number to use in the compressed read name in case this is a new read name
|
||||
* @return the next number to use in the compressed read name
|
||||
*/
|
||||
private void compressReadName(GATKSAMRecord read) {
|
||||
String name = read.getReadName();
|
||||
String compressedName = read.isReducedRead() ? "C" : "";
|
||||
if (readNameHash.containsKey(name))
|
||||
compressedName += readNameHash.get(name).toString();
|
||||
else {
|
||||
readNameHash.put(name, nextReadNumber);
|
||||
compressedName += nextReadNumber.toString();
|
||||
nextReadNumber++;
|
||||
protected static long compressReadName(final Object2LongOpenHashMap<String> hash, final GATKSAMRecord read, final long nextReadNumber) {
|
||||
final String name = read.getReadName();
|
||||
final StringBuilder compressedName = new StringBuilder();
|
||||
long result = nextReadNumber;
|
||||
if (read.isReducedRead()) {
|
||||
compressedName.append("C");
|
||||
}
|
||||
|
||||
read.setReadName(compressedName);
|
||||
final Long readNumber = hash.get(name);
|
||||
if (readNumber != null) {
|
||||
compressedName.append(readNumber);
|
||||
} else {
|
||||
hash.put(name, nextReadNumber);
|
||||
compressedName.append(nextReadNumber);
|
||||
result++;
|
||||
}
|
||||
read.setReadName(compressedName.toString());
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -657,8 +659,8 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
* @param read the read
|
||||
* @return Returns true if the read is the original read that went through map().
|
||||
*/
|
||||
private boolean isOriginalRead(LinkedList<GATKSAMRecord> list, GATKSAMRecord read) {
|
||||
return isWholeGenome() || list.getFirst().equals(read);
|
||||
private boolean isOriginalRead(ObjectArrayList<GATKSAMRecord> list, GATKSAMRecord read) {
|
||||
return isWholeGenome() || list.get(0).equals(read);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -46,14 +46,11 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author carneiro, depristo
|
||||
|
|
@ -67,13 +64,12 @@ public class SingleSampleCompressor {
|
|||
final private double minIndelProportionToTriggerVariant;
|
||||
final private int minBaseQual;
|
||||
final private ReduceReads.DownsampleStrategy downsampleStrategy;
|
||||
final private int nContigs;
|
||||
final private boolean allowPolyploidReduction;
|
||||
|
||||
private SlidingWindow slidingWindow;
|
||||
private int slidingWindowCounter;
|
||||
|
||||
public static Pair<Set<GATKSAMRecord>, CompressionStash> emptyPair = new Pair<Set<GATKSAMRecord>,CompressionStash>(new TreeSet<GATKSAMRecord>(), new CompressionStash());
|
||||
public static Pair<ObjectSet<GATKSAMRecord>, CompressionStash> emptyPair = new Pair<ObjectSet<GATKSAMRecord>,CompressionStash>(new ObjectAVLTreeSet<GATKSAMRecord>(), new CompressionStash());
|
||||
|
||||
public SingleSampleCompressor(final int contextSize,
|
||||
final int downsampleCoverage,
|
||||
|
|
@ -82,7 +78,6 @@ public class SingleSampleCompressor {
|
|||
final double minIndelProportionToTriggerVariant,
|
||||
final int minBaseQual,
|
||||
final ReduceReads.DownsampleStrategy downsampleStrategy,
|
||||
final int nContigs,
|
||||
final boolean allowPolyploidReduction) {
|
||||
this.contextSize = contextSize;
|
||||
this.downsampleCoverage = downsampleCoverage;
|
||||
|
|
@ -92,12 +87,11 @@ public class SingleSampleCompressor {
|
|||
this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant;
|
||||
this.minBaseQual = minBaseQual;
|
||||
this.downsampleStrategy = downsampleStrategy;
|
||||
this.nContigs = nContigs;
|
||||
this.allowPolyploidReduction = allowPolyploidReduction;
|
||||
}
|
||||
|
||||
public Pair<Set<GATKSAMRecord>, CompressionStash> addAlignment( GATKSAMRecord read ) {
|
||||
Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
public Pair<ObjectSet<GATKSAMRecord>, CompressionStash> addAlignment( GATKSAMRecord read ) {
|
||||
ObjectSet<GATKSAMRecord> reads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
CompressionStash stash = new CompressionStash();
|
||||
int readOriginalStart = read.getUnclippedStart();
|
||||
|
||||
|
|
@ -107,27 +101,27 @@ public class SingleSampleCompressor {
|
|||
(readOriginalStart - contextSize > slidingWindow.getStopLocation()))) { // this read is too far away from the end of the current sliding window
|
||||
|
||||
// close the current sliding window
|
||||
Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = slidingWindow.close();
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> readsAndStash = slidingWindow.close();
|
||||
reads = readsAndStash.getFirst();
|
||||
stash = readsAndStash.getSecond();
|
||||
slidingWindow = null; // so we create a new one on the next if
|
||||
}
|
||||
|
||||
if ( slidingWindow == null) { // this is the first read
|
||||
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction);
|
||||
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), allowPolyploidReduction);
|
||||
slidingWindowCounter++;
|
||||
}
|
||||
|
||||
stash.addAll(slidingWindow.addRead(read));
|
||||
return new Pair<Set<GATKSAMRecord>, CompressionStash>(reads, stash);
|
||||
return new Pair<ObjectSet<GATKSAMRecord>, CompressionStash>(reads, stash);
|
||||
}
|
||||
|
||||
public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
|
||||
public Pair<ObjectSet<GATKSAMRecord>, CompressionStash> close() {
|
||||
return (slidingWindow != null) ? slidingWindow.close() : emptyPair;
|
||||
}
|
||||
|
||||
public Set<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
|
||||
return slidingWindow == null ? Collections.<GATKSAMRecord>emptySet() : slidingWindow.closeVariantRegions(regions);
|
||||
public ObjectSet<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
|
||||
return slidingWindow == null ? ObjectSets.EMPTY_SET : slidingWindow.closeVariantRegions(regions);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,10 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import it.unimi.dsi.fastutil.bytes.Byte2IntArrayMap;
|
||||
import it.unimi.dsi.fastutil.bytes.Byte2IntMap;
|
||||
import it.unimi.dsi.fastutil.bytes.Byte2IntOpenHashMap;
|
||||
import it.unimi.dsi.fastutil.objects.*;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
|
|
@ -62,7 +66,11 @@ import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
|||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ListIterator;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -73,7 +81,7 @@ import java.util.*;
|
|||
public class SlidingWindow {
|
||||
|
||||
// Sliding Window data
|
||||
final private TreeSet<GATKSAMRecord> readsInWindow;
|
||||
final private ObjectAVLTreeSet<GATKSAMRecord> readsInWindow;
|
||||
final private LinkedList<HeaderElement> windowHeader;
|
||||
protected int contextSize; // the largest context size (between mismatches and indels)
|
||||
protected String contig;
|
||||
|
|
@ -102,8 +110,6 @@ public class SlidingWindow {
|
|||
protected ReduceReads.DownsampleStrategy downsampleStrategy;
|
||||
private boolean hasIndelQualities;
|
||||
|
||||
private final int nContigs;
|
||||
|
||||
private boolean allowPolyploidReductionInGeneral;
|
||||
|
||||
private static CompressionStash emptyRegions = new CompressionStash();
|
||||
|
|
@ -143,14 +149,13 @@ public class SlidingWindow {
|
|||
this.contigIndex = contigIndex;
|
||||
|
||||
contextSize = 10;
|
||||
nContigs = 1;
|
||||
|
||||
this.windowHeader = new LinkedList<HeaderElement>();
|
||||
windowHeader.addFirst(new HeaderElement(startLocation));
|
||||
this.readsInWindow = new TreeSet<GATKSAMRecord>();
|
||||
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>();
|
||||
}
|
||||
|
||||
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction) {
|
||||
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, boolean allowPolyploidReduction) {
|
||||
this.contextSize = contextSize;
|
||||
this.downsampleCoverage = downsampleCoverage;
|
||||
|
||||
|
|
@ -160,7 +165,7 @@ public class SlidingWindow {
|
|||
this.MIN_MAPPING_QUALITY = minMappingQuality;
|
||||
|
||||
this.windowHeader = new LinkedList<HeaderElement>();
|
||||
this.readsInWindow = new TreeSet<GATKSAMRecord>(new Comparator<GATKSAMRecord>() {
|
||||
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>(new Comparator<GATKSAMRecord>() {
|
||||
@Override
|
||||
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||
final int difference = read1.getSoftEnd() - read2.getSoftEnd();
|
||||
|
|
@ -184,7 +189,6 @@ public class SlidingWindow {
|
|||
|
||||
this.downsampleStrategy = downsampleStrategy;
|
||||
this.hasIndelQualities = hasIndelQualities;
|
||||
this.nContigs = nContigs;
|
||||
|
||||
this.allowPolyploidReductionInGeneral = allowPolyploidReduction;
|
||||
}
|
||||
|
|
@ -198,8 +202,10 @@ public class SlidingWindow {
|
|||
* sliding process.
|
||||
*
|
||||
* @param read the read
|
||||
* @return a list of reads that have been finished by sliding the window.
|
||||
* @return a non-null list of reads (in the CompressionStash) that have been finished by sliding the window.
|
||||
*/
|
||||
@Requires({"read != null"})
|
||||
@Ensures("result != null")
|
||||
public CompressionStash addRead(GATKSAMRecord read) {
|
||||
addToHeader(windowHeader, read); // update the window header counts
|
||||
readsInWindow.add(read); // add read to sliding reads
|
||||
|
|
@ -210,8 +216,8 @@ public class SlidingWindow {
|
|||
* Returns the next complete (or incomplete if closeLastRegion is true) variant region between 'from' (inclusive) and 'to' (exclusive)
|
||||
* but converted to global coordinates.
|
||||
*
|
||||
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates
|
||||
* @param to end window header index of the search window (exclusive); note that this uses local coordinates
|
||||
* @param from beginning window header index of the search window (inclusive) in local (to the windowHeader) coordinates
|
||||
* @param to end window header index of the search window (exclusive) in local (to the windowHeader) coordinates
|
||||
* @param variantSite boolean array with true marking variant regions
|
||||
* @param closeLastRegion if the last index is variant (so it's an incomplete region), should we close (and return as an interval) the location or ignore it?
|
||||
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region. All coordinates returned are global.
|
||||
|
|
@ -238,8 +244,8 @@ public class SlidingWindow {
|
|||
/**
|
||||
* Creates a list with all the complete and incomplete variant regions within 'from' (inclusive) and 'to' (exclusive)
|
||||
*
|
||||
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates
|
||||
* @param to end window header index of the search window (exclusive); note that this uses local coordinates
|
||||
* @param from beginning window header index of the search window (inclusive) in local (to the windowHeader) coordinates
|
||||
* @param to end window header index of the search window (exclusive) in local (to the windowHeader) coordinates
|
||||
* @param variantSite boolean array with true marking variant regions
|
||||
* @return a list with start/stops of variant regions following findNextVariantRegion description in global coordinates
|
||||
*/
|
||||
|
|
@ -289,7 +295,7 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
|
||||
readsInWindow.pollFirst();
|
||||
readsInWindow.remove(readsInWindow.first());
|
||||
}
|
||||
|
||||
return regions;
|
||||
|
|
@ -395,12 +401,16 @@ public class SlidingWindow {
|
|||
*
|
||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||
*
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @return a list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||
*/
|
||||
protected List<GATKSAMRecord> addToSyntheticReads(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||
LinkedList<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||
ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
if (start < end) {
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
||||
|
|
@ -450,11 +460,11 @@ public class SlidingWindow {
|
|||
* Finalizes one or more synthetic reads.
|
||||
*
|
||||
* @param type the synthetic reads you want to close
|
||||
* @return the GATKSAMRecords generated by finalizing the synthetic reads
|
||||
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
|
||||
*/
|
||||
private List<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
|
||||
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
|
||||
GATKSAMRecord read = null;
|
||||
List<GATKSAMRecord> list = new LinkedList<GATKSAMRecord>();
|
||||
ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
switch (type) {
|
||||
case CONSENSUS:
|
||||
|
|
@ -479,7 +489,7 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position with consensus data or empty
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
|
@ -501,7 +511,7 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param start beginning of the region
|
||||
* @param upTo limit to search for
|
||||
* @return next position with no filtered data
|
||||
* @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
|
@ -523,7 +533,7 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param start beginning of the region
|
||||
* @param upTo limit to search for
|
||||
* @return next position with non-empty element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
|
@ -544,14 +554,18 @@ public class SlidingWindow {
|
|||
/**
|
||||
* Adds bases to the filtered data synthetic read.
|
||||
*
|
||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||
* bases.
|
||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
|
||||
*
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||
* @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated.
|
||||
*/
|
||||
private List<GATKSAMRecord> addToFilteredData(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||
List<GATKSAMRecord> result = new ArrayList<GATKSAMRecord>(0);
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
private ObjectArrayList<GATKSAMRecord> addToFilteredData(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||
ObjectArrayList<GATKSAMRecord> result = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
if (filteredDataConsensus == null)
|
||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
|
||||
|
|
@ -585,9 +599,12 @@ public class SlidingWindow {
|
|||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||
* bases.
|
||||
*
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||
if (runningConsensus == null)
|
||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
|
||||
|
|
@ -621,32 +638,42 @@ public class SlidingWindow {
|
|||
syntheticRead.add(base, count, qual, insQual, delQual, rms);
|
||||
}
|
||||
|
||||
private List<GATKSAMRecord> compressVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
||||
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
|
||||
/**
|
||||
* Method to compress a variant region and return the associated reduced reads
|
||||
*
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param disallowPolyploidReductionAtThisPosition should we disallow polyploid (het) compression here?
|
||||
* @return a non-null list of all reads contained in the variant region
|
||||
*/
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectList<GATKSAMRecord> compressVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
||||
ObjectList<GATKSAMRecord> allReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
// Try to compress into a polyploid consensus
|
||||
int nHaplotypes = 0;
|
||||
int nVariantPositions = 0;
|
||||
int hetRefPosition = -1;
|
||||
boolean canCompress = true;
|
||||
boolean foundEvent = false;
|
||||
Object[] header = windowHeader.toArray();
|
||||
|
||||
// foundEvent will remain false if we don't allow polyploid reduction
|
||||
if ( allowPolyploidReductionInGeneral && !disallowPolyploidReductionAtThisPosition ) {
|
||||
for (int i = start; i<=stop; i++) {
|
||||
nHaplotypes = ((HeaderElement) header[i]).getNumberOfHaplotypes(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT);
|
||||
if (nHaplotypes > nContigs) {
|
||||
|
||||
int nAlleles = ((HeaderElement) header[i]).getNumberOfAlleles(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT);
|
||||
|
||||
// we will only work on diploid cases because we just don't want to handle/test other scenarios
|
||||
if ( nAlleles > 2 ) {
|
||||
canCompress = false;
|
||||
break;
|
||||
}
|
||||
} else if ( nAlleles == 2 ) {
|
||||
nVariantPositions++;
|
||||
|
||||
// guarantees that there is only 1 site in the variant region that needs more than one haplotype
|
||||
if (nHaplotypes > 1) {
|
||||
if (!foundEvent) {
|
||||
foundEvent = true;
|
||||
// make sure that there is only 1 site in the variant region that contains more than one allele
|
||||
if ( nVariantPositions == 1 ) {
|
||||
hetRefPosition = i;
|
||||
}
|
||||
else {
|
||||
} else if ( nVariantPositions > 1 ) {
|
||||
canCompress = false;
|
||||
break;
|
||||
}
|
||||
|
|
@ -654,10 +681,10 @@ public class SlidingWindow {
|
|||
}
|
||||
}
|
||||
|
||||
// Try to compress the variant region
|
||||
// the "foundEvent" protects us from trying to compress variant regions that are created by insertions
|
||||
if (canCompress && foundEvent) {
|
||||
allReads = createPolyploidConsensus(start, stop, nHaplotypes, ((HeaderElement) header[hetRefPosition]).getLocation());
|
||||
// Try to compress the variant region; note that using the hetRefPosition protects us from trying to compress
|
||||
// variant regions that are created by insertions (since we can't confirm here that they represent the same allele)
|
||||
if ( canCompress && hetRefPosition != -1 ) {
|
||||
allReads = createPolyploidConsensus(start, stop, ((HeaderElement) header[hetRefPosition]).getLocation());
|
||||
}
|
||||
|
||||
// Return all reads that overlap the variant region and remove them from the window header entirely
|
||||
|
|
@ -666,7 +693,7 @@ public class SlidingWindow {
|
|||
final int refStart = windowHeader.get(start).getLocation();
|
||||
final int refStop = windowHeader.get(stop).getLocation();
|
||||
|
||||
LinkedList<GATKSAMRecord> toRemove = new LinkedList<GATKSAMRecord>();
|
||||
ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<GATKSAMRecord>();
|
||||
for (GATKSAMRecord read : readsInWindow) {
|
||||
if (read.getSoftStart() <= refStop) {
|
||||
if (read.getAlignmentEnd() >= refStart) {
|
||||
|
|
@ -684,29 +711,31 @@ public class SlidingWindow {
|
|||
/**
|
||||
* Finalizes a variant region, any adjacent synthetic reads.
|
||||
*
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @return all reads contained in the variant region plus any adjacent synthetic reads
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param disallowPolyploidReductionAtThisPosition should we disallow polyploid (het) compression here?
|
||||
* @return a non-null list of all reads contained in the variant region plus any adjacent synthetic reads
|
||||
*/
|
||||
@Requires("start <= stop")
|
||||
protected List<GATKSAMRecord> closeVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
||||
List<GATKSAMRecord> allReads = compressVariantRegion(start, stop, disallowPolyploidReductionAtThisPosition);
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectList<GATKSAMRecord> closeVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
||||
ObjectList<GATKSAMRecord> allReads = compressVariantRegion(start, stop, disallowPolyploidReductionAtThisPosition);
|
||||
|
||||
List<GATKSAMRecord> result = (downsampleCoverage > 0) ? downsampleVariantRegion(allReads) : allReads;
|
||||
ObjectList<GATKSAMRecord> result = (downsampleCoverage > 0) ? downsampleVariantRegion(allReads) : allReads;
|
||||
result.addAll(addToSyntheticReads(windowHeader, 0, stop, false));
|
||||
result.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||
|
||||
return result; // finalized reads will be downsampled if necessary
|
||||
}
|
||||
|
||||
public Set<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
|
||||
TreeSet<GATKSAMRecord> allReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
public ObjectSet<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
|
||||
ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
if (!regions.isEmpty()) {
|
||||
int lastStop = -1;
|
||||
int windowHeaderStart = getStartLocation(windowHeader);
|
||||
|
||||
for (GenomeLoc region : regions) {
|
||||
if (((FinishedGenomeLoc)region).isFinished() && region.getContig() == contig && region.getStart() >= windowHeaderStart && region.getStop() < windowHeaderStart + windowHeader.size()) {
|
||||
if (((FinishedGenomeLoc)region).isFinished() && region.getContig().equals(contig) && region.getStart() >= windowHeaderStart && region.getStop() < windowHeaderStart + windowHeader.size()) {
|
||||
int start = region.getStart() - windowHeaderStart;
|
||||
int stop = region.getStop() - windowHeaderStart;
|
||||
|
||||
|
|
@ -733,10 +762,12 @@ public class SlidingWindow {
|
|||
*
|
||||
* It will use the downsampling strategy defined by the SlidingWindow
|
||||
*
|
||||
* @param allReads the reads to select from (all reads that cover the window)
|
||||
* @return a list of reads selected by the downsampler to cover the window to at least the desired coverage
|
||||
* @param allReads a non-null list of reads to select from (all reads that cover the window)
|
||||
* @return a non-null list of reads selected by the downsampler to cover the window to at least the desired coverage
|
||||
*/
|
||||
protected List<GATKSAMRecord> downsampleVariantRegion(final List<GATKSAMRecord> allReads) {
|
||||
@Requires({"allReads != null"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectList<GATKSAMRecord> downsampleVariantRegion(final ObjectList<GATKSAMRecord> allReads) {
|
||||
int nReads = allReads.size();
|
||||
if (nReads == 0)
|
||||
return allReads;
|
||||
|
|
@ -746,7 +777,7 @@ public class SlidingWindow {
|
|||
|
||||
ReservoirDownsampler <GATKSAMRecord> downsampler = new ReservoirDownsampler<GATKSAMRecord>(downsampleCoverage);
|
||||
downsampler.submit(allReads);
|
||||
return downsampler.consumeFinalizedItems();
|
||||
return new ObjectArrayList<GATKSAMRecord>(downsampler.consumeFinalizedItems());
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -755,11 +786,12 @@ public class SlidingWindow {
|
|||
* regions that still exist regardless of being able to fulfill the
|
||||
* context size requirement in the end.
|
||||
*
|
||||
* @return All reads generated
|
||||
* @return A non-null set/list of all reads generated
|
||||
*/
|
||||
public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
|
||||
@Ensures("result != null")
|
||||
public Pair<ObjectSet<GATKSAMRecord>, CompressionStash> close() {
|
||||
// mark variant regions
|
||||
Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
ObjectSet<GATKSAMRecord> finalizedReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
CompressionStash regions = new CompressionStash();
|
||||
boolean forceCloseUnfinishedRegions = true;
|
||||
|
||||
|
|
@ -774,13 +806,13 @@ public class SlidingWindow {
|
|||
}
|
||||
}
|
||||
|
||||
return new Pair<Set<GATKSAMRecord>, CompressionStash>(finalizedReads, regions);
|
||||
return new Pair<ObjectSet<GATKSAMRecord>, CompressionStash>(finalizedReads, regions);
|
||||
}
|
||||
|
||||
/**
|
||||
* generates the SAM record for the running consensus read and resets it (to null)
|
||||
*
|
||||
* @return the read contained in the running consensus
|
||||
* @return the read contained in the running consensus or null
|
||||
*/
|
||||
protected GATKSAMRecord finalizeRunningConsensus() {
|
||||
GATKSAMRecord finalizedRead = null;
|
||||
|
|
@ -798,7 +830,7 @@ public class SlidingWindow {
|
|||
/**
|
||||
* generates the SAM record for the filtered data consensus and resets it (to null)
|
||||
*
|
||||
* @return the read contained in the running consensus
|
||||
* @return the read contained in the running consensus or null
|
||||
*/
|
||||
protected GATKSAMRecord finalizeFilteredDataConsensus() {
|
||||
GATKSAMRecord finalizedRead = null;
|
||||
|
|
@ -813,18 +845,26 @@ public class SlidingWindow {
|
|||
return finalizedRead;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private List<GATKSAMRecord> createPolyploidConsensus(int start, int stop, int nHaplotypes, int hetRefPosition) {
|
||||
/**
|
||||
* Finalizes a variant region, any adjacent synthetic reads.
|
||||
*
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param hetRefPosition reference position (in global coordinates) of the het site
|
||||
* @return a non-null list of all reads contained in the variant region as a polyploid consensus
|
||||
*/
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
@Ensures("result != null")
|
||||
private ObjectList<GATKSAMRecord> createPolyploidConsensus(final int start, final int stop, final int hetRefPosition) {
|
||||
// we will create two (positive strand, negative strand) headers for each contig
|
||||
List<LinkedList<HeaderElement>> headersPosStrand = new ArrayList<LinkedList<HeaderElement>>();
|
||||
List<LinkedList<HeaderElement>> headersNegStrand = new ArrayList<LinkedList<HeaderElement>>();
|
||||
List<GATKSAMRecord> hetReads = new LinkedList<GATKSAMRecord>();
|
||||
Map<Byte, Integer> haplotypeHeaderMap = new HashMap<Byte, Integer>(nHaplotypes);
|
||||
ObjectList<LinkedList<HeaderElement>> headersPosStrand = new ObjectArrayList<LinkedList<HeaderElement>>();
|
||||
ObjectList<LinkedList<HeaderElement>> headersNegStrand = new ObjectArrayList<LinkedList<HeaderElement>>();
|
||||
ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
Byte2IntMap haplotypeHeaderMap = new Byte2IntArrayMap(2);
|
||||
int currentHaplotype = 0;
|
||||
int refStart = windowHeader.get(start).getLocation();
|
||||
int refStop = windowHeader.get(stop).getLocation();
|
||||
List<GATKSAMRecord> toRemove = new LinkedList<GATKSAMRecord>();
|
||||
ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<GATKSAMRecord>();
|
||||
for (GATKSAMRecord read : readsInWindow) {
|
||||
int haplotype;
|
||||
|
||||
|
|
@ -835,6 +875,7 @@ public class SlidingWindow {
|
|||
// check if the read contains the het site
|
||||
if (read.getSoftStart() <= hetRefPosition && read.getSoftEnd() >= hetRefPosition) {
|
||||
int readPos = ReadUtils.getReadCoordinateForReferenceCoordinate(read, hetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
// TODO -- THIS IS A HUGE BUG AS IT WILL NOT WORK FOR DELETIONS; see commented out unit test
|
||||
byte base = read.getReadBases()[readPos];
|
||||
byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPos];
|
||||
|
||||
|
|
@ -902,7 +943,7 @@ public class SlidingWindow {
|
|||
* @param read the incoming read to be added to the sliding window
|
||||
* @param removeRead if we are removing the read from the header or adding
|
||||
*/
|
||||
private void updateHeaderCounts(LinkedList<HeaderElement> header, GATKSAMRecord read, boolean removeRead) {
|
||||
private void updateHeaderCounts(final LinkedList<HeaderElement> header, final GATKSAMRecord read, final boolean removeRead) {
|
||||
byte[] bases = read.getReadBases();
|
||||
byte[] quals = read.getBaseQualities();
|
||||
byte[] insQuals = read.getExistingBaseInsertionQualities();
|
||||
|
|
@ -998,7 +1039,7 @@ public class SlidingWindow {
|
|||
}
|
||||
}
|
||||
|
||||
private void removeReadsFromWindow (List<GATKSAMRecord> readsToRemove) {
|
||||
private void removeReadsFromWindow (ObjectList<GATKSAMRecord> readsToRemove) {
|
||||
for (GATKSAMRecord read : readsToRemove) {
|
||||
readsInWindow.remove(read);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import it.unimi.dsi.fastutil.bytes.ByteArrayList;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
|
|
@ -57,10 +59,8 @@ import org.broadinstitute.sting.utils.recalibration.EventType;
|
|||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* Running Consensus is a read that is compressed as a sliding window travels over the reads
|
||||
|
|
@ -76,11 +76,54 @@ import java.util.List;
|
|||
* @since 8/26/11
|
||||
*/
|
||||
public class SyntheticRead {
|
||||
private List<BaseIndex> bases;
|
||||
private List<Byte> counts;
|
||||
private List<Byte> quals;
|
||||
private List<Byte> insertionQuals;
|
||||
private List<Byte> deletionQuals;
|
||||
// Rather than storing a separate list for each attribute in SingleBaseInfo, store one list to reduce
|
||||
// memory footprint.
|
||||
// TODO: better name
|
||||
private static class SingleBaseInfo {
|
||||
byte baseIndexOrdinal; // enum BaseIndex.ordinal
|
||||
byte count;
|
||||
byte qual;
|
||||
byte insertionQual;
|
||||
byte deletionQual;
|
||||
|
||||
SingleBaseInfo(byte baseIndexOrdinal, byte count, byte qual, byte insertionQual, byte deletionQual) {
|
||||
this.baseIndexOrdinal = baseIndexOrdinal;
|
||||
this.count = count;
|
||||
this.qual = qual;
|
||||
this.insertionQual = insertionQual;
|
||||
this.deletionQual = deletionQual;
|
||||
}
|
||||
}
|
||||
|
||||
// This class is merely sharing of code for convertVariableGivenBases().
|
||||
private abstract class SingleBaseInfoIterator implements Iterator<Byte> {
|
||||
final Iterator<SingleBaseInfo> it;
|
||||
|
||||
SingleBaseInfoIterator() {
|
||||
this.it = basesCountsQuals.iterator();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return it.hasNext();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Map from ordinal to enum value.
|
||||
private static final BaseIndex[] BaseIndexByOrdinal = new BaseIndex[BaseIndex.values().length];
|
||||
static
|
||||
{
|
||||
for (final BaseIndex baseIndex : BaseIndex.values()) {
|
||||
BaseIndexByOrdinal[baseIndex.ordinal()] = baseIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private final ObjectArrayList<SingleBaseInfo> basesCountsQuals;
|
||||
private double mappingQuality; // the average of the rms of the mapping qualities of all the reads that contributed to this consensus
|
||||
private String readTag;
|
||||
|
||||
|
|
@ -108,11 +151,7 @@ public class SyntheticRead {
|
|||
*/
|
||||
public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, String readTag, boolean hasIndelQualities, boolean isNegativeRead) {
|
||||
final int initialCapacity = 10000;
|
||||
bases = new ArrayList<BaseIndex>(initialCapacity);
|
||||
counts = new ArrayList<Byte>(initialCapacity);
|
||||
quals = new ArrayList<Byte>(initialCapacity);
|
||||
insertionQuals = new ArrayList<Byte>(initialCapacity);
|
||||
deletionQuals = new ArrayList<Byte>(initialCapacity);
|
||||
basesCountsQuals = new ObjectArrayList<SingleBaseInfo>(initialCapacity);
|
||||
mappingQuality = 0.0;
|
||||
|
||||
this.readTag = readTag;
|
||||
|
|
@ -126,12 +165,11 @@ public class SyntheticRead {
|
|||
this.isNegativeStrand = isNegativeRead;
|
||||
}
|
||||
|
||||
public SyntheticRead(List<BaseIndex> bases, List<Byte> counts, List<Byte> quals, List<Byte> insertionQuals, List<Byte> deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) {
|
||||
this.bases = bases;
|
||||
this.counts = counts;
|
||||
this.quals = quals;
|
||||
this.insertionQuals = insertionQuals;
|
||||
this.deletionQuals = deletionQuals;
|
||||
public SyntheticRead(ObjectArrayList<BaseIndex> bases, ByteArrayList counts, ByteArrayList quals, ByteArrayList insertionQuals, ByteArrayList deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) {
|
||||
basesCountsQuals = new ObjectArrayList<SingleBaseInfo>(bases.size());
|
||||
for (int i = 0; i < bases.size(); ++i) {
|
||||
basesCountsQuals.add(new SingleBaseInfo(bases.get(i).getOrdinalByte(), counts.get(i), quals.get(i), insertionQuals.get(i), deletionQuals.get(i)));
|
||||
}
|
||||
this.mappingQuality = mappingQuality;
|
||||
this.readTag = readTag;
|
||||
this.header = header;
|
||||
|
|
@ -153,16 +191,12 @@ public class SyntheticRead {
|
|||
*/
|
||||
@Requires("count <= Byte.MAX_VALUE")
|
||||
public void add(BaseIndex base, byte count, byte qual, byte insQual, byte delQual, double mappingQuality) {
|
||||
counts.add(count);
|
||||
bases.add(base);
|
||||
quals.add(qual);
|
||||
insertionQuals.add(insQual);
|
||||
deletionQuals.add(delQual);
|
||||
basesCountsQuals.add(new SingleBaseInfo(base.getOrdinalByte(), count, qual, insQual, delQual));
|
||||
this.mappingQuality += mappingQuality;
|
||||
}
|
||||
|
||||
public BaseIndex getBase(final int readCoordinate) {
|
||||
return bases.get(readCoordinate);
|
||||
return BaseIndexByOrdinal[basesCountsQuals.get(readCoordinate).baseIndexOrdinal];
|
||||
}
|
||||
|
||||
public int getRefStart() {
|
||||
|
|
@ -192,7 +226,7 @@ public class SyntheticRead {
|
|||
read.setReadName(readName);
|
||||
read.setBaseQualities(convertBaseQualities(), EventType.BASE_SUBSTITUTION);
|
||||
read.setReadBases(convertReadBases());
|
||||
read.setMappingQuality((int) Math.ceil(mappingQuality / bases.size()));
|
||||
read.setMappingQuality((int) Math.ceil(mappingQuality / basesCountsQuals.size()));
|
||||
read.setReadGroup(readGroupRecord);
|
||||
read.setAttribute(readTag, convertBaseCounts());
|
||||
|
||||
|
|
@ -210,30 +244,46 @@ public class SyntheticRead {
|
|||
* @return true if it is, false if it isn't.
|
||||
*/
|
||||
private boolean isAllDeletions() {
|
||||
for (BaseIndex b : bases)
|
||||
if (b != BaseIndex.D)
|
||||
for (SingleBaseInfo b : basesCountsQuals)
|
||||
if (b.baseIndexOrdinal != BaseIndex.D.getOrdinalByte())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public int size () {
|
||||
return bases.size();
|
||||
return basesCountsQuals.size();
|
||||
}
|
||||
|
||||
private byte [] convertBaseQualities() {
|
||||
return convertVariableGivenBases(bases, quals);
|
||||
return convertVariableGivenBases(new SingleBaseInfoIterator() {
|
||||
public Byte next() {
|
||||
return it.next().qual;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private byte [] convertInsertionQualities() {
|
||||
return convertVariableGivenBases(bases, insertionQuals);
|
||||
return convertVariableGivenBases(new SingleBaseInfoIterator() {
|
||||
public Byte next() {
|
||||
return it.next().insertionQual;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private byte [] convertDeletionQualities() {
|
||||
return convertVariableGivenBases(bases, deletionQuals);
|
||||
return convertVariableGivenBases(new SingleBaseInfoIterator() {
|
||||
public Byte next() {
|
||||
return it.next().deletionQual;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
protected byte [] convertBaseCounts() {
|
||||
byte[] countsArray = convertVariableGivenBases(bases, counts);
|
||||
byte[] countsArray = convertVariableGivenBases(new SingleBaseInfoIterator() {
|
||||
public Byte next() {
|
||||
return it.next().count;
|
||||
}
|
||||
});
|
||||
|
||||
if (countsArray.length == 0)
|
||||
throw new ReviewedStingException("Reduced read has counts array of length 0");
|
||||
|
|
@ -247,12 +297,14 @@ public class SyntheticRead {
|
|||
}
|
||||
|
||||
private byte [] convertReadBases() {
|
||||
byte [] readArray = new byte[getReadLengthWithNoDeletions(bases)];
|
||||
byte [] readArray = new byte[getReadLengthWithNoDeletions()];
|
||||
int i = 0;
|
||||
for (BaseIndex baseIndex : bases)
|
||||
for (final SingleBaseInfo singleBaseInfo : basesCountsQuals) {
|
||||
final BaseIndex baseIndex = BaseIndexByOrdinal[singleBaseInfo.baseIndexOrdinal];
|
||||
if (baseIndex != BaseIndex.D)
|
||||
readArray[i++] = baseIndex.getByte();
|
||||
|
||||
}
|
||||
|
||||
return readArray;
|
||||
}
|
||||
|
||||
|
|
@ -264,10 +316,11 @@ public class SyntheticRead {
|
|||
* @return the cigar string for the synthetic read
|
||||
*/
|
||||
private Cigar buildCigar() {
|
||||
LinkedList<CigarElement> cigarElements = new LinkedList<CigarElement>();
|
||||
ObjectArrayList<CigarElement> cigarElements = new ObjectArrayList<CigarElement>();
|
||||
CigarOperator cigarOperator = null;
|
||||
int length = 0;
|
||||
for (BaseIndex b : bases) {
|
||||
for (final SingleBaseInfo singleBaseInfo : basesCountsQuals) {
|
||||
final BaseIndex b = BaseIndexByOrdinal[singleBaseInfo.baseIndexOrdinal];
|
||||
CigarOperator op;
|
||||
switch (b) {
|
||||
case D:
|
||||
|
|
@ -303,18 +356,16 @@ public class SyntheticRead {
|
|||
/**
|
||||
* Shared functionality for all conversion utilities
|
||||
*
|
||||
* @param bases the read bases
|
||||
* @param variable the list to convert
|
||||
* @param variableIterator the list to convert
|
||||
* @return a converted variable given the bases and skipping deletions
|
||||
*/
|
||||
|
||||
private static byte [] convertVariableGivenBases (List<BaseIndex> bases, List<Byte> variable) {
|
||||
byte [] variableArray = new byte[getReadLengthWithNoDeletions(bases)];
|
||||
private byte [] convertVariableGivenBases (Iterator<Byte> variableIterator) {
|
||||
byte [] variableArray = new byte[getReadLengthWithNoDeletions()];
|
||||
int i = 0;
|
||||
Iterator<Byte> variableIterator = variable.iterator();
|
||||
for (BaseIndex baseIndex : bases) {
|
||||
for (final SingleBaseInfo singleBaseInfo : basesCountsQuals) {
|
||||
byte count = variableIterator.next();
|
||||
if (baseIndex != BaseIndex.D)
|
||||
if (singleBaseInfo.baseIndexOrdinal != BaseIndex.D.getOrdinalByte())
|
||||
variableArray[i++] = count;
|
||||
}
|
||||
return variableArray;
|
||||
|
|
@ -324,13 +375,12 @@ public class SyntheticRead {
|
|||
/**
|
||||
* Shared functionality for all conversion utilities
|
||||
*
|
||||
* @param bases the read bases
|
||||
* @return the length of the read with no deletions
|
||||
*/
|
||||
private static int getReadLengthWithNoDeletions(List<BaseIndex> bases) {
|
||||
int readLength = bases.size();
|
||||
for (BaseIndex baseIndex : bases)
|
||||
if (baseIndex == BaseIndex.D)
|
||||
private int getReadLengthWithNoDeletions() {
|
||||
int readLength = basesCountsQuals.size();
|
||||
for (final SingleBaseInfo singleBaseInfo : basesCountsQuals)
|
||||
if (singleBaseInfo.baseIndexOrdinal == BaseIndex.D.getOrdinalByte())
|
||||
readLength--;
|
||||
return readLength;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,59 +46,255 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Simple walker to plot the coverage distribution per base.
|
||||
*
|
||||
* <p>
|
||||
* Features of this walker:
|
||||
* <li>includes a smart counting of uncovered bases without visiting the uncovered loci.</li>
|
||||
* <li>includes reads with deletions in the loci (optionally can be turned off)</li>
|
||||
* </p>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The BAM file and an optional interval list (works for WGS as well)
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A GATK Report with the coverage distribution per base
|
||||
*
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx4g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T BaseCoverageDistribution \
|
||||
* -I myData.bam \
|
||||
* -L interesting.intervals \
|
||||
* -fd \
|
||||
* -o report.grp
|
||||
* </pre>
|
||||
* User: carneiro
|
||||
* Date: 1/27/13
|
||||
* Time: 11:16 AM
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
public class BaseCoverageDistribution extends LocusWalker<Integer, Map<Integer, Long>> {
|
||||
@Output(required = true)
|
||||
public class BaseCoverageDistribution extends LocusWalker<ArrayList<Integer>, Map<Integer, ArrayList<Long>>> {
|
||||
/**
|
||||
* The output GATK Report table
|
||||
*/
|
||||
@Output(required = true, doc = "The output GATK Report table")
|
||||
private PrintStream out;
|
||||
|
||||
/**
|
||||
* Whether or not a deletion should be counted towards the coverage of a site
|
||||
*/
|
||||
@Argument(required = false, shortName="del", fullName = "include_deletions", doc ="whether or not to include reads with deletions on the loci in the pileup")
|
||||
private boolean includeDeletions = true;
|
||||
|
||||
/**
|
||||
* Whether or not to calculate and output a filtered coverage distribution. Bases will be filtered according to the
|
||||
* minimum_mapping_quality and minimum_base_quality parameters below.
|
||||
*/
|
||||
@Argument(required = false, shortName="fd", fullName = "filtered_distribution", doc ="calculate and report the filtered coverage distribution of bases")
|
||||
private boolean calculateFilteredDistribution = false;
|
||||
|
||||
/**
|
||||
* The minimum mapping quality a read must have to be counted towards the filtered coverage of a site
|
||||
*/
|
||||
@Argument(required = false, shortName="mmq", fullName = "minimum_mapping_quality", doc ="minimum mapping quality of a read to include it in the filtered coverage distribution")
|
||||
private byte minMappingQuality = 20;
|
||||
|
||||
/**
|
||||
* The minimum base quality a base must have to be counted towards the filtered coverage of a site
|
||||
*/
|
||||
@Argument(required = false, shortName="mbq", fullName = "minimum_base_quality", doc ="minimum base quality of a base to include it in the filtered coverage distribution")
|
||||
private byte minBaseQuality = 17;
|
||||
|
||||
private GenomeLoc previousLocus = null;
|
||||
private long uncoveredBases = 0L;
|
||||
private final LinkedList<GenomeLoc> intervalList = new LinkedList<GenomeLoc>();
|
||||
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() {
|
||||
return true;
|
||||
return includeDeletions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return context.getBasePileup().getReads().size(); // I want the reads instead of the base pileup because I want to count deletions.
|
||||
public void initialize() {
|
||||
if (getToolkit().getIntervals() != null)
|
||||
intervalList.addAll(getToolkit().getIntervals()); // if the user provided intervals, keep track of them for uncovered bases calculation
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Integer, Long> reduceInit() {
|
||||
return new HashMap<Integer, Long>(10000);
|
||||
public ArrayList<Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ArrayList<Integer> result = new ArrayList<Integer>(2);
|
||||
GenomeLoc currentLocus = ref.getLocus();
|
||||
tallyUncoveredBases(currentLocus);
|
||||
previousLocus = currentLocus;
|
||||
result.add(context.getBasePileup().getReads().size()); // I want the reads instead of the base pileup because I want to count deletions.
|
||||
if (calculateFilteredDistribution)
|
||||
result.add(context.getBasePileup().getBaseAndMappingFilteredPileup(minBaseQuality, minMappingQuality).getReads().size()); // filtered pileup
|
||||
else {
|
||||
result.add(result.get(0)); // repeat the same value as the unfiltered pileup if filters are not on
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Integer, Long> reduce(Integer value, Map<Integer, Long> sum) {
|
||||
Long curr = sum.get(value);
|
||||
if (curr == null)
|
||||
curr = 0L;
|
||||
sum.put(value, curr + 1);
|
||||
public Map<Integer, ArrayList<Long>> reduceInit() {
|
||||
return new HashMap<Integer, ArrayList<Long>>(10000);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Integer, ArrayList<Long>> reduce(ArrayList<Integer> value, Map<Integer, ArrayList<Long>> sum) {
|
||||
final int unfilteredCoverage = value.get(0);
|
||||
final int filteredCoverage = value.get(1);
|
||||
incrementSumArray(sum, unfilteredCoverage, 0);
|
||||
incrementSumArray(sum, filteredCoverage, 1);
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Map<Integer, Long> result) {
|
||||
GATKReport report = GATKReport.newSimpleReport("BaseCoverageDistribution", "Coverage", "Count");
|
||||
for (Map.Entry<Integer, Long> entry : result.entrySet()) {
|
||||
report.addRow(entry.getKey(), entry.getValue());
|
||||
public void onTraversalDone(Map<Integer, ArrayList<Long>> result) {
|
||||
tallyUncoveredBasesTillEndOfTraversal();
|
||||
GATKReport report;
|
||||
|
||||
if (calculateFilteredDistribution) {
|
||||
report = GATKReport.newSimpleReport("BaseCoverageDistribution", "Coverage", "Count", "Filtered");
|
||||
} else {
|
||||
report = GATKReport.newSimpleReport("BaseCoverageDistribution", "Coverage", "Count");
|
||||
report.addRow(0, uncoveredBases); // preemptively add the uncovered bases row (since they'll never exist in the Map)
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, ArrayList<Long>> entry : result.entrySet()) {
|
||||
final ArrayList<Long> values = entry.getValue();
|
||||
final int coverage = entry.getKey();
|
||||
if (calculateFilteredDistribution) {
|
||||
if (coverage == 0) { // special case for the uncovered bases. The filtered pileups may have an entry, but the unfiltered ones won't.
|
||||
report.addRow(coverage, uncoveredBases, uncoveredBases + values.get(1));
|
||||
} else {
|
||||
report.addRow(coverage, values.get(0), values.get(1));
|
||||
}
|
||||
} else {
|
||||
report.addRow(coverage, values.get(0));
|
||||
}
|
||||
}
|
||||
// In case the filtered distribution never had a pileup filtered down to zero coverage, output the overall uncovered bases for both
|
||||
if (calculateFilteredDistribution && !result.containsKey(0)) {
|
||||
report.addRow(0, uncoveredBases, uncoveredBases);
|
||||
}
|
||||
report.print(out);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the ArrayList if needed. Returns the initialized element (or previously initialized)
|
||||
* this method is used directly by the incrementSumArray.
|
||||
*
|
||||
* @param sum the map
|
||||
* @param coverage the key to the map to extract the array list
|
||||
* @return if the ArrayList exists, return it. Otherwise, initialize it with 0 counters.
|
||||
*/
|
||||
private ArrayList<Long> initializeSumArray(final Map<Integer, ArrayList<Long>> sum, final int coverage) {
|
||||
ArrayList<Long> curr = sum.get(coverage);
|
||||
if (curr == null) {
|
||||
curr = new ArrayList<Long>(2);
|
||||
curr.add(0L); // number of bases with this unfiltered coverage
|
||||
curr.add(0L); // number of bases with this filtered coverage
|
||||
sum.put(coverage, curr);
|
||||
}
|
||||
return curr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the counter for the given arrayindex (type of coverage : filtered or unfiltered) initializing if necessary
|
||||
*
|
||||
* @param sum the hash
|
||||
* @param coverage the hash key
|
||||
* @param arrayIndex which distribution to increment, 0 for unfiltered, 1 for filtered.
|
||||
*/
|
||||
private void incrementSumArray(final Map<Integer, ArrayList<Long>> sum, final int coverage, final int arrayIndex) {
|
||||
final ArrayList<Long> currentTally = initializeSumArray(sum, coverage);
|
||||
currentTally.set(arrayIndex, currentTally.get(arrayIndex) + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts all the uncovered loci after the end of traversal.
|
||||
*
|
||||
* - Modifies the global variable uncoveredBases
|
||||
* - Uses global variables: intervalList and previousLocus
|
||||
*
|
||||
* takes into account that the traversal may have been due over a set of intervals, or over the whole genome.
|
||||
*/
|
||||
private void tallyUncoveredBasesTillEndOfTraversal() {
|
||||
GenomeLocParser parser = getToolkit().getGenomeLocParser();
|
||||
GenomeLoc lastLocus;
|
||||
if (intervalList.isEmpty()) { // whole genome, add up all contigs past previousLocus
|
||||
final int lastContigIndex = getToolkit().getSAMFileHeader().getSequenceDictionary().size() - 1;
|
||||
final int lastContigLength = getToolkit().getSAMFileHeader().getSequence(lastContigIndex).getSequenceLength();
|
||||
final String lastContigName = getToolkit().getSAMFileHeader().getSequence(lastContigIndex).getSequenceName();
|
||||
lastLocus = parser.createGenomeLoc(lastContigName, lastContigIndex, lastContigLength, lastContigLength);
|
||||
} else {
|
||||
GenomeLoc lastInterval = intervalList.getLast();
|
||||
lastLocus = parser.createGenomeLoc(lastInterval.getContig(), lastInterval.getContigIndex(), lastInterval.getStop(), lastInterval.getStop());
|
||||
}
|
||||
tallyUncoveredBases(lastLocus);
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts all the uncovered loci that have been skipped since the last visited locus. This method allows coverage
|
||||
* tools to run with @By(DataSource.READS) instead of @By(DataSource.REFERENCE), while still accurately calculating
|
||||
* uncovered bases
|
||||
*
|
||||
* //todo -- make this a generic capability of Coverage and DiagnoseTargets
|
||||
*
|
||||
* - Modifies the global variable uncoveredBases
|
||||
* - Uses global variables: intervalList and previousLocus
|
||||
*
|
||||
* takes into account that the traversal may have been due over a set of intervals, or over the whole genome.
|
||||
*
|
||||
* @param currentLocus the locus we are visiting right now
|
||||
*/
|
||||
private void tallyUncoveredBases(GenomeLoc currentLocus) {
|
||||
long distance = 0;
|
||||
if (previousLocus == null) { // first base visited
|
||||
GenomeLocParser parser = getToolkit().getGenomeLocParser();
|
||||
if (intervalList.isEmpty()) { // if this is whole genome (no intervals requested), add what we missed.
|
||||
final GenomeLoc zeroLoc = parser.createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(), 0, 1, 1);
|
||||
distance += currentLocus.distanceAcrossContigs(zeroLoc, getToolkit().getSAMFileHeader());
|
||||
} else { // if we are running on an interval list, add all intervals before the current locus to the uncovered bases counter
|
||||
while (!intervalList.peek().containsP(currentLocus)) {
|
||||
GenomeLoc interval = intervalList.removeFirst();
|
||||
distance += interval.size();
|
||||
}
|
||||
distance += currentLocus.getStart() - intervalList.peek().getStart(); // now this is the interval that contains the current locus. Discount the bases from the beginning.
|
||||
}
|
||||
} else {
|
||||
final GenomeLoc previousInterval = intervalList.peekFirst(); // peekFirst returns null if interval list is empty (WGS).
|
||||
distance = currentLocus.distanceAcrossContigs(previousLocus, getToolkit().getSAMFileHeader()) - 1;
|
||||
if (previousInterval != null && !previousInterval.containsP(currentLocus)) {
|
||||
intervalList.removeFirst(); // we're done with the previous interval
|
||||
final GenomeLoc currentInterval = intervalList.peekFirst();
|
||||
distance -= currentInterval.distanceAcrossContigs(previousInterval, getToolkit().getSAMFileHeader()) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
uncoveredBases += distance;
|
||||
}
|
||||
}
|
||||
|
|
@ -56,6 +56,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -104,7 +105,7 @@ import java.util.*;
|
|||
* @author Mauricio Carneiro, Roger Zurawicki
|
||||
* @since 5/8/12
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
@By(value = DataSource.READS)
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||
|
|
|
|||
|
|
@ -59,16 +59,20 @@ import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ActiveRegionTraversalParameters(extension = 0, maxRegion = 50000)
|
||||
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
||||
@Output(required = true)
|
||||
private PrintStream out;
|
||||
|
||||
@Argument(fullName = "uncovered", shortName = "u", required = false, doc = "output intervals that fail the coverage threshold instead")
|
||||
private boolean outputUncovered = false;
|
||||
|
||||
@Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false)
|
||||
private int coverageThreshold = 20;
|
||||
|
||||
|
|
@ -85,10 +89,10 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
|||
|
||||
@Override
|
||||
public GenomeLoc map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker tracker) {
|
||||
if (activeRegion.isActive())
|
||||
if ((!outputUncovered && activeRegion.isActive()) || (outputUncovered && !activeRegion.isActive()))
|
||||
return activeRegion.getLocation();
|
||||
else
|
||||
return null;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -267,7 +267,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][BaseUtils.BASES.length+1][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY];
|
||||
static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_SAM_QUAL_SCORE +1][BaseUtils.BASES.length+1][QualityUtils.MAX_SAM_QUAL_SCORE +1][MAX_PLOIDY];
|
||||
|
||||
protected boolean inCache(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
|
||||
return getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy) != null;
|
||||
|
|
@ -427,7 +427,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
if ( qual > SAMUtils.MAX_PHRED_SCORE )
|
||||
throw new UserException.MisencodedBAM(p.getRead(), "we encountered an extremely high quality score (" + (int)qual + ")");
|
||||
if ( capBaseQualsAtMappingQual )
|
||||
qual = (byte)Math.min((int)qual, p.getMappingQual());
|
||||
qual = (byte) Math.min( 0xff & qual, p.getMappingQual());
|
||||
if ( (int)qual < minBaseQual )
|
||||
qual = (byte)0;
|
||||
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -123,7 +124,7 @@ public class ErrorModel {
|
|||
}
|
||||
}
|
||||
|
||||
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
|
||||
double p = QualityUtils.qualToErrorProbLog10((byte)(maxQualityScore-minQualityScore));
|
||||
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
|
||||
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
||||
// maximum uncertainty if there's no ref data at site
|
||||
|
|
@ -270,7 +271,7 @@ public class ErrorModel {
|
|||
})
|
||||
private double log10PoissonProbabilitySiteGivenQual(byte q, int coverage, int mismatches) {
|
||||
// same as log10ProbabilitySiteGivenQual but with Poisson approximation to avoid numerical underflows
|
||||
double lambda = MathUtils.phredScaleToProbability(q) * (double )coverage;
|
||||
double lambda = QualityUtils.qualToErrorProb(q) * (double )coverage;
|
||||
// log10(e^-lambda*lambda^k/k!) = -lambda + k*log10(lambda) - log10factorial(k)
|
||||
return Math.log10(lambda)*mismatches - lambda*log10MinusE- MathUtils.log10Factorial(mismatches);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup != null) {
|
||||
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.CONTAMINATION_FRACTION, UAC.contaminationLog);
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey()), UAC.contaminationLog);
|
||||
b.PL(genotypeLikelihoods);
|
||||
b.DP(getFilteredDepth(pileup));
|
||||
genotypes.add(b.make());
|
||||
|
|
@ -259,4 +259,4 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
return count;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,9 +101,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
// calculate the GLs
|
||||
ArrayList<SampleGenotypeData> GLs = new ArrayList<SampleGenotypeData>(contexts.size());
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
// Down-sample with bias according to the contamination level (global or per file)
|
||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
if ( UAC.CONTAMINATION_FRACTION > 0.0 )
|
||||
pileup = perReadAlleleLikelihoodMap.createPerAlleleDownsampledBasePileup(pileup, UAC.CONTAMINATION_FRACTION, UAC.contaminationLog);
|
||||
final Double contamination = UAC.getSampleContamination().get(sample.getKey());
|
||||
if( contamination > 0.0 ) //no need to enter if no contamination reduction
|
||||
pileup = perReadAlleleLikelihoodMap.createPerAlleleDownsampledBasePileup(pileup,contamination, UAC.contaminationLog);
|
||||
if ( useBAQedPileup )
|
||||
pileup = createBAQedPileup(pileup);
|
||||
|
||||
|
|
|
|||
|
|
@ -150,6 +150,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only
|
||||
sample-level error model
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false)
|
||||
public boolean IGNORE_LANE_INFO = false;
|
||||
|
||||
|
|
@ -157,6 +158,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
Generalized ploidy argument: VCF file that contains truth calls for reference sample. If a reference sample is included through argument -refsample,
|
||||
then this argument is required.
|
||||
*/
|
||||
@Hidden
|
||||
@Input(fullName="reference_sample_calls", shortName = "referenceCalls", doc="VCF file with the truth callset for the reference sample", required=false)
|
||||
RodBinding<VariantContext> referenceSampleRod;
|
||||
|
||||
|
|
@ -165,6 +167,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
that a bar-coded reference sample be included with the polyploid/pooled data in a sequencing experimental design.
|
||||
If argument is absent, no per-site error model is included and calling is done with a generalization of traditional statistical calling.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(shortName="refsample", fullName="reference_sample_name", doc="Reference sample name.", required=false)
|
||||
String referenceSampleName;
|
||||
|
||||
|
|
@ -174,6 +177,10 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
|
||||
public int samplePloidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
|
||||
|
||||
|
||||
/**
|
||||
* The following argument are for debug-only tweaks when running generalized ploidy with a reference sample
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)
|
||||
byte minQualityScore= 1;
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
||||
|
|
@ -61,6 +62,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -82,6 +84,7 @@ import java.util.*;
|
|||
* genotype of each sample. The system can either emit just the variant sites or complete genotypes (which includes
|
||||
* homozygous reference calls) satisfying some phred-scaled confidence value. The genotyper can make accurate calls on
|
||||
* both single sample data and multi-sample data.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
|
|
@ -109,7 +112,7 @@ import java.util.*;
|
|||
*
|
||||
* <p>
|
||||
* The above command will call all of the samples in your provided BAM files [-I arguments] together and produce a VCF file
|
||||
* with sites and genotypes for all samples. The easiest way to get the dbSNP file is from the GATK resource bundle. Several
|
||||
* with sites and genotypes for all samples. The easiest way to get the dbSNP file is from the GATK resource bundle (see Guide FAQs for details). Several
|
||||
* arguments have parameters that should be chosen based on the average coverage per sample in your data. See the detailed
|
||||
* argument descriptions below.
|
||||
* </p>
|
||||
|
|
@ -132,12 +135,12 @@ import java.util.*;
|
|||
* <li>The system can be very aggressive in calling variants. In the 1000 genomes project for pilot 2 (deep coverage of ~35x)
|
||||
* we expect the raw Qscore > 50 variants to contain at least ~10% FP calls. We use extensive post-calling filters to eliminate
|
||||
* most of these FPs. Variant Quality Score Recalibration is a tool to perform this filtering.</li>
|
||||
* <li>We only handle diploid genotypes</li>
|
||||
* <li>The generalized ploidy model can be used to handle non-diploid or pooled samples (see the -ploidy argument in the table below).</li>
|
||||
* </ul>
|
||||
*
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
|
||||
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} )
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
|
|
@ -160,9 +163,9 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
|
||||
/**
|
||||
* If a call overlaps with a record from the provided comp track, the INFO field will be annotated
|
||||
* as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
|
||||
* Records that are filtered in the comp track will be ignored.
|
||||
* Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
|
||||
* as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
|
||||
* Records that are filtered in the comp track will be ignored.
|
||||
* Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
|
||||
*/
|
||||
@Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
|
||||
public List<RodBinding<VariantContext>> comps = Collections.emptyList();
|
||||
|
|
@ -257,6 +260,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
if ( UAC.referenceSampleName != null )
|
||||
samples.remove(UAC.referenceSampleName);
|
||||
}
|
||||
if ( UAC.CONTAMINATION_FRACTION_FILE != null )
|
||||
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
|
||||
|
||||
// check for a bad max alleles value
|
||||
if ( UAC.MAX_ALTERNATE_ALLELES > GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED)
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
|
|
@ -138,6 +139,10 @@ public class UnifiedGenotyperEngine {
|
|||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
}
|
||||
|
||||
protected UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, Set<String> samples, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int ploidy) {
|
||||
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
||||
|
|
@ -577,43 +582,53 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
private final static double[] binomialProbabilityDepthCache = new double[10000];
|
||||
private final static double REF_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5);
|
||||
|
||||
static {
|
||||
for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
|
||||
binomialProbabilityDepthCache[i] = MathUtils.binomialProbability(0, i, 0.5);
|
||||
binomialProbabilityDepthCache[i] = MathUtils.log10BinomialProbability(i, 0, REF_BINOMIAL_PROB_LOG10_0_5);
|
||||
}
|
||||
}
|
||||
|
||||
private final double getRefBinomialProb(final int depth) {
|
||||
private final double getRefBinomialProbLog10(final int depth) {
|
||||
if ( depth < binomialProbabilityDepthCache.length )
|
||||
return binomialProbabilityDepthCache[depth];
|
||||
else
|
||||
return MathUtils.binomialProbability(0, depth, 0.5);
|
||||
return MathUtils.log10BinomialProbability(depth, 0, REF_BINOMIAL_PROB_LOG10_0_5);
|
||||
}
|
||||
|
||||
|
||||
private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
|
||||
if ( contexts == null )
|
||||
return null;
|
||||
|
||||
double P_of_ref = initialPofRef;
|
||||
double log10POfRef = Math.log10(initialPofRef);
|
||||
|
||||
// for each sample that we haven't examined yet
|
||||
for ( String sample : samples ) {
|
||||
boolean isCovered = contexts.containsKey(sample);
|
||||
if ( ignoreCoveredSamples && isCovered )
|
||||
final AlignmentContext context = contexts.get(sample);
|
||||
if ( ignoreCoveredSamples && context != null )
|
||||
continue;
|
||||
|
||||
|
||||
int depth = 0;
|
||||
|
||||
if ( isCovered ) {
|
||||
depth = contexts.get(sample).getBasePileup().depthOfCoverage();
|
||||
}
|
||||
|
||||
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
|
||||
final int depth = context == null ? 0 : context.getBasePileup().depthOfCoverage();
|
||||
log10POfRef += estimateLog10ReferenceConfidenceForOneSample(depth, theta);
|
||||
}
|
||||
|
||||
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||
return new VariantCallContext(vc, QualityUtils.phredScaleLog10CorrectRate(log10POfRef) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the log10 probability of a sample with sequencing depth and no alt allele is actually truly homozygous reference
|
||||
*
|
||||
* Assumes the sample is diploid
|
||||
*
|
||||
* @param depth the depth of the sample
|
||||
* @param theta the heterozygosity of this species (between 0 and 1)
|
||||
* @return a valid log10 probability of the sample being hom-ref
|
||||
*/
|
||||
@Requires({"depth >= 0", "theta >= 0.0 && theta <= 1.0"})
|
||||
@Ensures("MathUtils.goodLog10Probability(result)")
|
||||
protected double estimateLog10ReferenceConfidenceForOneSample(final int depth, final double theta) {
|
||||
final double log10PofNonRef = Math.log10(theta / 2.0) + getRefBinomialProbLog10(depth);
|
||||
return MathUtils.log10OneMinusX(Math.pow(10.0, log10PofNonRef));
|
||||
}
|
||||
|
||||
protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
|
|
|||
|
|
@ -113,12 +113,14 @@ public abstract class AFCalc implements Cloneable {
|
|||
/**
|
||||
* Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
|
||||
*
|
||||
* @param vc the VariantContext holding the alleles and sample information
|
||||
* @param vc the VariantContext holding the alleles and sample information. The VariantContext
|
||||
* must have at least 1 alternative allele
|
||||
* @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i)
|
||||
* @return result (for programming convenience)
|
||||
*/
|
||||
public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
|
||||
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
|
||||
if ( vc.getNAlleles() == 1 ) throw new IllegalArgumentException("VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
|
||||
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
|
||||
if ( stateTracker == null ) throw new IllegalArgumentException("Results object cannot be null");
|
||||
|
||||
|
|
@ -170,18 +172,19 @@ public abstract class AFCalc implements Cloneable {
|
|||
* @param vc the initial VC provided by the caller to this AFcalculation
|
||||
* @return a potentially simpler VC that's more tractable to genotype
|
||||
*/
|
||||
@Requires("vc != null")
|
||||
@Requires({"vc != null", "vc.getNAlleles() > 1"})
|
||||
@Ensures("result != null")
|
||||
protected abstract VariantContext reduceScope(final VariantContext vc);
|
||||
|
||||
/**
|
||||
* Actually carry out the log10PNonRef calculation on vc, storing results in results
|
||||
*
|
||||
* @param vc variant context with alleles and genotype likelihoods
|
||||
* @param vc variant context with alleles and genotype likelihoods,
|
||||
* must have at least one alt allele
|
||||
* @param log10AlleleFrequencyPriors priors
|
||||
* @return a AFCalcResult object describing the results of this calculation
|
||||
*/
|
||||
@Requires({"vc != null", "log10AlleleFrequencyPriors != null"})
|
||||
@Requires({"vc != null", "log10AlleleFrequencyPriors != null", "vc.getNAlleles() > 1"})
|
||||
protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors);
|
||||
|
||||
|
|
|
|||
|
|
@ -156,16 +156,25 @@ import java.util.*;
|
|||
public AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
final List<AFCalcResult> independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors);
|
||||
final List<AFCalcResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
|
||||
return combineIndependentPNonRefs(vc, withMultiAllelicPriors);
|
||||
}
|
||||
|
||||
if ( independentResultTrackers.size() == 0 )
|
||||
throw new IllegalStateException("Independent alleles model returned an empty list of results at VC " + vc);
|
||||
|
||||
if ( independentResultTrackers.size() == 1 ) {
|
||||
// fast path for the very common bi-allelic use case
|
||||
return independentResultTrackers.get(0);
|
||||
} else {
|
||||
// we are a multi-allelic, so we need to actually combine the results
|
||||
final List<AFCalcResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
|
||||
return combineIndependentPNonRefs(vc, withMultiAllelicPriors);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the conditional exact AFCalcResult for each allele in vc independently, returning
|
||||
* the result of each, in order of the alt alleles in VC
|
||||
*
|
||||
* @param vc the VariantContext we want to analyze
|
||||
* @param vc the VariantContext we want to analyze, with at least 1 alt allele
|
||||
* @param log10AlleleFrequencyPriors the priors
|
||||
* @return a list of the AFCalcResults for each bi-allelic sub context of vc
|
||||
*/
|
||||
|
|
@ -208,13 +217,20 @@ import java.util.*;
|
|||
@Ensures("result.size() == vc.getNAlleles() - 1")
|
||||
protected final List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
|
||||
final int nAltAlleles = vc.getNAlleles() - 1;
|
||||
final List<VariantContext> vcs = new LinkedList<VariantContext>();
|
||||
|
||||
for ( int altI = 0; altI < nAltAlleles; altI++ ) {
|
||||
vcs.add(biallelicCombinedGLs(vc, altI + 1));
|
||||
if ( nAltAlleles == 1 ) {
|
||||
// fast path for bi-allelic case.
|
||||
return Collections.singletonList(vc);
|
||||
} else {
|
||||
// go through the work of ripping up the VC into its biallelic components
|
||||
final List<VariantContext> vcs = new LinkedList<VariantContext>();
|
||||
|
||||
for ( int altI = 0; altI < nAltAlleles; altI++ ) {
|
||||
vcs.add(biallelicCombinedGLs(vc, altI + 1));
|
||||
}
|
||||
|
||||
return vcs;
|
||||
}
|
||||
|
||||
return vcs;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -47,19 +47,21 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -70,13 +72,14 @@ import java.util.*;
|
|||
* Date: Mar 14, 2011
|
||||
*/
|
||||
|
||||
public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||
public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||
|
||||
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
|
||||
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
|
||||
private static final byte MIN_QUALITY = (byte) 16;
|
||||
private static final int GRAPH_KMER_STEP = 6;
|
||||
|
||||
// Smith-Waterman parameters originally copied from IndelRealigner
|
||||
// Smith-Waterman parameters originally copied from IndelRealigner, only used during GGA mode
|
||||
private static final double SW_MATCH = 5.0; // 1.0;
|
||||
private static final double SW_MISMATCH = -10.0; //-1.0/3.0;
|
||||
private static final double SW_GAP = -22.0; //-1.0-1.0/3.0;
|
||||
|
|
@ -84,60 +87,84 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
|
||||
private final boolean DEBUG;
|
||||
private final PrintStream GRAPH_WRITER;
|
||||
private final List<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
|
||||
private final List<DeBruijnAssemblyGraph> graphs = new ArrayList<DeBruijnAssemblyGraph>();
|
||||
private final int MIN_KMER;
|
||||
|
||||
private int PRUNE_FACTOR = 2;
|
||||
|
||||
public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter, final int minKmer ) {
|
||||
public DeBruijnAssembler(final boolean debug, final PrintStream graphWriter, final int minKmer) {
|
||||
super();
|
||||
DEBUG = debug;
|
||||
GRAPH_WRITER = graphWriter;
|
||||
MIN_KMER = minKmer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
|
||||
* @param activeRegion ActiveRegion object holding the reads which are to be used during assembly
|
||||
* @param refHaplotype reference haplotype object
|
||||
* @param fullReferenceWithPadding byte array holding the reference sequence with padding
|
||||
* @param refLoc GenomeLoc object corresponding to the reference sequence with padding
|
||||
* @param PRUNE_FACTOR prune kmers from the graph if their weight is <= this value
|
||||
* @param activeAllelesToGenotype the alleles to inject into the haplotypes during GGA mode
|
||||
* @return a non-empty list of all the haplotypes that are produced during assembly
|
||||
*/
|
||||
@Ensures({"result.contains(refHaplotype)"})
|
||||
public List<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final List<VariantContext> activeAllelesToGenotype ) {
|
||||
if( activeRegion == null ) { throw new IllegalArgumentException("Assembly engine cannot be used with a null ActiveRegion."); }
|
||||
if( refHaplotype == null ) { throw new IllegalArgumentException("Reference haplotype cannot be null."); }
|
||||
if( fullReferenceWithPadding.length != refLoc.size() ) { throw new IllegalArgumentException("Reference bases and reference loc must be the same size."); }
|
||||
if( PRUNE_FACTOR < 0 ) { throw new IllegalArgumentException("Pruning factor cannot be negative"); }
|
||||
|
||||
// set the pruning factor for this run of the assembly engine
|
||||
this.PRUNE_FACTOR = PRUNE_FACTOR;
|
||||
|
||||
// create the graphs
|
||||
createDeBruijnGraphs( activeRegion.getReads(), refHaplotype );
|
||||
|
||||
// clean up the graphs by pruning and merging
|
||||
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
|
||||
pruneGraph( graph, PRUNE_FACTOR );
|
||||
//eliminateNonRefPaths( graph );
|
||||
mergeNodes( graph );
|
||||
}
|
||||
|
||||
// print the graphs if the appropriate debug option has been turned on
|
||||
if( GRAPH_WRITER != null ) {
|
||||
printGraphs();
|
||||
}
|
||||
|
||||
// find the best paths in the graphs
|
||||
// find the best paths in the graphs and return them as haplotypes
|
||||
return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
|
||||
}
|
||||
|
||||
@Requires({"reads != null", "refHaplotype != null"})
|
||||
protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
|
||||
graphs.clear();
|
||||
|
||||
final int maxKmer = refHaplotype.getBases().length;
|
||||
// create the graph
|
||||
for( int kmer = MIN_KMER; kmer <= maxKmer; kmer += 6 ) {
|
||||
final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) {
|
||||
graphs.add(graph);
|
||||
final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1;
|
||||
if( maxKmer < MIN_KMER ) { return; } // Reads are too small for assembly so don't try to create any assembly graphs
|
||||
|
||||
// create the graph for each possible kmer
|
||||
for( int kmer = maxKmer; kmer >= MIN_KMER; kmer -= GRAPH_KMER_STEP ) {
|
||||
final DeBruijnAssemblyGraph graph = createGraphFromSequences( reads, kmer, refHaplotype, DEBUG );
|
||||
if( graph != null ) { // graphs that fail during creation ( for example, because there are cycles in the reference graph ) will show up here as a null graph object
|
||||
// do a series of steps to clean up the raw assembly graph to make it analysis-ready
|
||||
pruneGraph(graph, PRUNE_FACTOR);
|
||||
cleanNonRefPaths(graph);
|
||||
mergeNodes(graph);
|
||||
if( graph.getReferenceSourceVertex() != null ) { // if the graph contains interesting variation from the reference
|
||||
sanityCheckReferenceGraph(graph, refHaplotype);
|
||||
graphs.add(graph);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected static void mergeNodes( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||
@Requires({"graph != null"})
|
||||
protected static void mergeNodes( final DeBruijnAssemblyGraph graph ) {
|
||||
boolean foundNodesToMerge = true;
|
||||
while( foundNodesToMerge ) {
|
||||
foundNodesToMerge = false;
|
||||
|
||||
for( final DeBruijnEdge e : graph.edgeSet() ) {
|
||||
final DeBruijnVertex outgoingVertex = graph.getEdgeTarget(e);
|
||||
final DeBruijnVertex incomingVertex = graph.getEdgeSource(e);
|
||||
if( !outgoingVertex.equals(incomingVertex) && graph.inDegreeOf(outgoingVertex) == 1 && graph.outDegreeOf(incomingVertex) == 1) {
|
||||
if( !outgoingVertex.equals(incomingVertex) && graph.outDegreeOf(incomingVertex) == 1 && graph.inDegreeOf(outgoingVertex) == 1 &&
|
||||
graph.inDegreeOf(incomingVertex) <= 1 && graph.outDegreeOf(outgoingVertex) <= 1 && graph.isReferenceNode(incomingVertex) == graph.isReferenceNode(outgoingVertex) ) {
|
||||
final Set<DeBruijnEdge> outEdges = graph.outgoingEdgesOf(outgoingVertex);
|
||||
final Set<DeBruijnEdge> inEdges = graph.incomingEdgesOf(incomingVertex);
|
||||
if( inEdges.size() == 1 && outEdges.size() == 1 ) {
|
||||
|
|
@ -167,7 +194,42 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
}
|
||||
}
|
||||
|
||||
protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
|
||||
protected static void cleanNonRefPaths( final DeBruijnAssemblyGraph graph ) {
|
||||
if( graph.getReferenceSourceVertex() == null || graph.getReferenceSinkVertex() == null ) {
|
||||
return;
|
||||
}
|
||||
// Remove non-ref edges connected before and after the reference path
|
||||
final Set<DeBruijnEdge> edgesToCheck = new HashSet<DeBruijnEdge>();
|
||||
edgesToCheck.addAll(graph.incomingEdgesOf(graph.getReferenceSourceVertex()));
|
||||
while( !edgesToCheck.isEmpty() ) {
|
||||
final DeBruijnEdge e = edgesToCheck.iterator().next();
|
||||
if( !e.isRef() ) {
|
||||
edgesToCheck.addAll( graph.incomingEdgesOf(graph.getEdgeSource(e)) );
|
||||
graph.removeEdge(e);
|
||||
}
|
||||
edgesToCheck.remove(e);
|
||||
}
|
||||
edgesToCheck.addAll(graph.outgoingEdgesOf(graph.getReferenceSinkVertex()));
|
||||
while( !edgesToCheck.isEmpty() ) {
|
||||
final DeBruijnEdge e = edgesToCheck.iterator().next();
|
||||
if( !e.isRef() ) {
|
||||
edgesToCheck.addAll( graph.outgoingEdgesOf(graph.getEdgeTarget(e)) );
|
||||
graph.removeEdge(e);
|
||||
}
|
||||
edgesToCheck.remove(e);
|
||||
}
|
||||
|
||||
// Run through the graph and clean up singular orphaned nodes
|
||||
final List<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
|
||||
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||
if( graph.inDegreeOf(v) == 0 && graph.outDegreeOf(v) == 0 ) {
|
||||
verticesToRemove.add(v);
|
||||
}
|
||||
}
|
||||
graph.removeAllVertices(verticesToRemove);
|
||||
}
|
||||
|
||||
protected static void pruneGraph( final DeBruijnAssemblyGraph graph, final int pruneFactor ) {
|
||||
final List<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
|
||||
for( final DeBruijnEdge e : graph.edgeSet() ) {
|
||||
if( e.getMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
|
||||
|
|
@ -186,50 +248,41 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
graph.removeAllVertices(verticesToRemove);
|
||||
}
|
||||
|
||||
protected static void eliminateNonRefPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||
final List<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
|
||||
boolean done = false;
|
||||
while( !done ) {
|
||||
done = true;
|
||||
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||
if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) {
|
||||
boolean isRefNode = false;
|
||||
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
|
||||
if( e.isRef() ) {
|
||||
isRefNode = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !isRefNode ) {
|
||||
done = false;
|
||||
verticesToRemove.add(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
graph.removeAllVertices(verticesToRemove);
|
||||
verticesToRemove.clear();
|
||||
protected static void sanityCheckReferenceGraph(final DeBruijnAssemblyGraph graph, final Haplotype refHaplotype) {
|
||||
if( graph.getReferenceSourceVertex() == null ) {
|
||||
throw new IllegalStateException("All reference graphs must have a reference source vertex.");
|
||||
}
|
||||
if( graph.getReferenceSinkVertex() == null ) {
|
||||
throw new IllegalStateException("All reference graphs must have a reference sink vertex.");
|
||||
}
|
||||
if( !Arrays.equals(graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true), refHaplotype.getBases()) ) {
|
||||
throw new IllegalStateException("Mismatch between the reference haplotype and the reference assembly graph path." +
|
||||
" graph = " + new String(graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true)) +
|
||||
" haplotype = " + new String(refHaplotype.getBases())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Collection<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
|
||||
@Requires({"reads != null", "KMER_LENGTH > 0", "refHaplotype != null"})
|
||||
protected static DeBruijnAssemblyGraph createGraphFromSequences( final List<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
|
||||
|
||||
final DeBruijnAssemblyGraph graph = new DeBruijnAssemblyGraph();
|
||||
|
||||
// First pull kmers from the reference haplotype and add them to the graph
|
||||
final byte[] refSequence = refHaplotype.getBases();
|
||||
if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) {
|
||||
final int kmersInSequence = refSequence.length - KMER_LENGTH + 1;
|
||||
for (int i = 0; i < kmersInSequence - 1; i++) {
|
||||
// get the kmers
|
||||
final byte[] kmer1 = new byte[KMER_LENGTH];
|
||||
System.arraycopy(refSequence, i, kmer1, 0, KMER_LENGTH);
|
||||
final byte[] kmer2 = new byte[KMER_LENGTH];
|
||||
System.arraycopy(refSequence, i+1, kmer2, 0, KMER_LENGTH);
|
||||
if( !addKmersToGraph(graph, kmer1, kmer2, true) ) {
|
||||
for( int iii = 0; iii < kmersInSequence - 1; iii++ ) {
|
||||
if( !graph.addKmersToGraph(Arrays.copyOfRange(refSequence, iii, iii + KMER_LENGTH), Arrays.copyOfRange(refSequence, iii + 1, iii + 1 + KMER_LENGTH), true) ) {
|
||||
if( DEBUG ) {
|
||||
System.out.println("Cycle detected in reference graph for kmer = " + KMER_LENGTH + " ...skipping");
|
||||
}
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next pull kmers out of every read and throw them on the graph
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
final byte[] sequence = read.getReadBases();
|
||||
final byte[] qualities = read.getBaseQualities();
|
||||
|
|
@ -245,56 +298,30 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
break;
|
||||
}
|
||||
}
|
||||
int countNumber = 1;
|
||||
if (read.isReducedRead()) {
|
||||
// compute mean number of reduced read counts in current kmer span
|
||||
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
|
||||
// precise rounding can make a difference with low consensus counts
|
||||
countNumber = MathUtils.arrayMax(counts);
|
||||
// countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||
}
|
||||
|
||||
if( !badKmer ) {
|
||||
// get the kmers
|
||||
final byte[] kmer1 = new byte[KMER_LENGTH];
|
||||
System.arraycopy(sequence, iii, kmer1, 0, KMER_LENGTH);
|
||||
final byte[] kmer2 = new byte[KMER_LENGTH];
|
||||
System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH);
|
||||
int countNumber = 1;
|
||||
if( read.isReducedRead() ) {
|
||||
// compute mean number of reduced read counts in current kmer span
|
||||
// precise rounding can make a difference with low consensus counts
|
||||
countNumber = MathUtils.arrayMax(Arrays.copyOfRange(reducedReadCounts, iii, iii + KMER_LENGTH));
|
||||
}
|
||||
|
||||
for (int k=0; k < countNumber; k++)
|
||||
addKmersToGraph(graph, kmer1, kmer2, false);
|
||||
final byte[] kmer1 = Arrays.copyOfRange(sequence, iii, iii + KMER_LENGTH);
|
||||
final byte[] kmer2 = Arrays.copyOfRange(sequence, iii + 1, iii + 1 + KMER_LENGTH);
|
||||
|
||||
for( int kkk=0; kkk < countNumber; kkk++ ) {
|
||||
graph.addKmersToGraph(kmer1, kmer2, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected static boolean addKmersToGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final byte[] kmer1, final byte[] kmer2, final boolean isRef ) {
|
||||
|
||||
final int numVertexBefore = graph.vertexSet().size();
|
||||
final DeBruijnVertex v1 = new DeBruijnVertex( kmer1, kmer1.length );
|
||||
graph.addVertex(v1);
|
||||
final DeBruijnVertex v2 = new DeBruijnVertex( kmer2, kmer2.length );
|
||||
graph.addVertex(v2);
|
||||
if( isRef && graph.vertexSet().size() == numVertexBefore ) { return false; }
|
||||
|
||||
final DeBruijnEdge targetEdge = graph.getEdge(v1, v2);
|
||||
if ( targetEdge == null ) {
|
||||
graph.addEdge(v1, v2, new DeBruijnEdge( isRef ));
|
||||
} else {
|
||||
if( isRef ) {
|
||||
targetEdge.setIsRef( true );
|
||||
}
|
||||
targetEdge.setMultiplicity(targetEdge.getMultiplicity() + 1);
|
||||
}
|
||||
return true;
|
||||
return graph;
|
||||
}
|
||||
|
||||
protected void printGraphs() {
|
||||
int count = 0;
|
||||
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
|
||||
GRAPH_WRITER.println("digraph kmer" + count++ +" {");
|
||||
GRAPH_WRITER.println("digraph assemblyGraphs {");
|
||||
for( final DeBruijnAssemblyGraph graph : graphs ) {
|
||||
for( final DeBruijnEdge edge : graph.edgeSet() ) {
|
||||
if( edge.getMultiplicity() > PRUNE_FACTOR ) {
|
||||
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
|
||||
|
|
@ -305,24 +332,23 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
if( !edge.isRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
|
||||
}
|
||||
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||
final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
|
||||
GRAPH_WRITER.println("\t" + v.toString() + " [label=\"" + label + "\"]");
|
||||
GRAPH_WRITER.println("\t" + v.toString() + " [label=\"" + new String(graph.getAdditionalSequence(v)) + "\"]");
|
||||
}
|
||||
GRAPH_WRITER.println("}");
|
||||
}
|
||||
GRAPH_WRITER.println("}");
|
||||
}
|
||||
|
||||
@Requires({"refWithPadding.length > refHaplotype.getBases().length", "refLoc.containsP(activeRegionWindow)"})
|
||||
@Ensures({"result.contains(refHaplotype)"})
|
||||
private List<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final List<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
|
||||
final List<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
|
||||
private List<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] refWithPadding, final GenomeLoc refLoc, final List<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
|
||||
|
||||
// add the reference haplotype separately from all the others
|
||||
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( fullReferenceWithPadding, refHaplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||
refHaplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
|
||||
refHaplotype.setCigar( swConsensus.getCigar() );
|
||||
if( !returnHaplotypes.add( refHaplotype ) ) {
|
||||
throw new ReviewedStingException("Unable to add reference haplotype during assembly: " + refHaplotype);
|
||||
}
|
||||
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
|
||||
final List<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
|
||||
refHaplotype.setAlignmentStartHapwrtRef(activeRegionWindow.getStart() - refLoc.getStart());
|
||||
final Cigar c = new Cigar();
|
||||
c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
|
||||
refHaplotype.setCigar(c);
|
||||
returnHaplotypes.add( refHaplotype );
|
||||
|
||||
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
|
||||
final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength();
|
||||
|
|
@ -331,30 +357,50 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
for( final VariantContext compVC : activeAllelesToGenotype ) {
|
||||
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
|
||||
final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
|
||||
addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true );
|
||||
addHaplotypeForGGA( insertedRefHaplotype, refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true );
|
||||
}
|
||||
}
|
||||
|
||||
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
|
||||
for( final DeBruijnAssemblyGraph graph : graphs ) {
|
||||
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
|
||||
Haplotype h = new Haplotype( path.getBases() );
|
||||
if( !returnHaplotypes.contains(h) ) {
|
||||
final Cigar cigar = path.calculateCigar();
|
||||
if( cigar.isEmpty() ) {
|
||||
throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength());
|
||||
} else if ( pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < 60 ) { // N cigar elements means that a bubble was too divergent from the reference so skip over this path
|
||||
continue;
|
||||
} else if( cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // SW failure
|
||||
throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength());
|
||||
}
|
||||
h.setCigar(cigar);
|
||||
|
||||
final Haplotype h = new Haplotype( path.getBases() );
|
||||
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
|
||||
// extend partial haplotypes which are anchored in the reference to include the full active region
|
||||
h = extendPartialHaplotype(h, activeRegionStart, refWithPadding);
|
||||
final Cigar leftAlignedCigar = leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(h.getCigar()), refWithPadding, h.getBases(), activeRegionStart, 0);
|
||||
if( leftAlignedCigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // left alignment failure
|
||||
continue;
|
||||
}
|
||||
if( !returnHaplotypes.contains(h) ) {
|
||||
h.setAlignmentStartHapwrtRef(activeRegionStart);
|
||||
h.setCigar( leftAlignedCigar );
|
||||
returnHaplotypes.add(h);
|
||||
|
||||
// for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
if( !activeAllelesToGenotype.isEmpty() ) {
|
||||
final Map<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
|
||||
// for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
if( !activeAllelesToGenotype.isEmpty() ) {
|
||||
final Map<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), refWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
|
||||
|
||||
// This if statement used to additionally have:
|
||||
// "|| !vcOnHaplotype.hasSameAllelesAs(compVC)"
|
||||
// but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto
|
||||
// a haplotype that already contains a 1bp insertion (so practically it is reference but
|
||||
// falls into the bin for the 1bp deletion because we keep track of the artificial alleles).
|
||||
if( vcOnHaplotype == null ) {
|
||||
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
|
||||
addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false );
|
||||
// This if statement used to additionally have:
|
||||
// "|| !vcOnHaplotype.hasSameAllelesAs(compVC)"
|
||||
// but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto
|
||||
// a haplotype that already contains a 1bp insertion (so practically it is reference but
|
||||
// falls into the bin for the 1bp deletion because we keep track of the artificial alleles).
|
||||
if( vcOnHaplotype == null ) {
|
||||
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
|
||||
addHaplotypeForGGA( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -363,7 +409,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
}
|
||||
}
|
||||
|
||||
if( DEBUG ) {
|
||||
if( DEBUG ) {
|
||||
if( returnHaplotypes.size() > 1 ) {
|
||||
System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes to evaluate every read against.");
|
||||
} else {
|
||||
|
|
@ -371,51 +417,150 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
}
|
||||
for( final Haplotype h : returnHaplotypes ) {
|
||||
System.out.println( h.toString() );
|
||||
System.out.println( "> Cigar = " + h.getCigar() );
|
||||
System.out.println( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() );
|
||||
}
|
||||
}
|
||||
|
||||
return returnHaplotypes;
|
||||
}
|
||||
|
||||
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final List<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
|
||||
/**
|
||||
* Extend partial haplotypes which are anchored in the reference to include the full active region
|
||||
* @param haplotype the haplotype to extend
|
||||
* @param activeRegionStart the place where the active region starts in the ref byte array
|
||||
* @param refWithPadding the full reference byte array with padding which encompasses the active region
|
||||
* @return a haplotype fully extended to encompass the active region
|
||||
*/
|
||||
@Requires({"haplotype != null", "activeRegionStart > 0", "refWithPadding != null", "refWithPadding.length > 0"})
|
||||
@Ensures({"result != null", "result.getCigar() != null"})
|
||||
private Haplotype extendPartialHaplotype( final Haplotype haplotype, final int activeRegionStart, final byte[] refWithPadding ) {
|
||||
final Cigar cigar = haplotype.getCigar();
|
||||
final Cigar newCigar = new Cigar();
|
||||
byte[] newHaplotypeBases = haplotype.getBases();
|
||||
int refPos = activeRegionStart;
|
||||
int hapPos = 0;
|
||||
for( CigarElement ce : cigar.getCigarElements() ) {
|
||||
switch (ce.getOperator()) {
|
||||
case M:
|
||||
refPos += ce.getLength();
|
||||
hapPos += ce.getLength();
|
||||
newCigar.add(ce);
|
||||
break;
|
||||
case I:
|
||||
hapPos += ce.getLength();
|
||||
newCigar.add(ce);
|
||||
break;
|
||||
case D:
|
||||
refPos += ce.getLength();
|
||||
newCigar.add(ce);
|
||||
break;
|
||||
case X:
|
||||
newHaplotypeBases = ArrayUtils.addAll( Arrays.copyOfRange(newHaplotypeBases, 0, hapPos),
|
||||
ArrayUtils.addAll(Arrays.copyOfRange(refWithPadding, refPos, refPos + ce.getLength()),
|
||||
Arrays.copyOfRange(newHaplotypeBases, hapPos, newHaplotypeBases.length)));
|
||||
refPos += ce.getLength();
|
||||
hapPos += ce.getLength();
|
||||
newCigar.add(new CigarElement(ce.getLength(), CigarOperator.M));
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unsupported cigar operator detected: " + ce.getOperator());
|
||||
}
|
||||
}
|
||||
final Haplotype returnHaplotype = new Haplotype(newHaplotypeBases, haplotype.isReference());
|
||||
returnHaplotype.setCigar( newCigar );
|
||||
return returnHaplotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* We use CigarOperator.N as the signal that an incomplete or too divergent bubble was found during bubble traversal
|
||||
* @param c the cigar to test
|
||||
* @return true if we should skip over this path
|
||||
*/
|
||||
@Requires("c != null")
|
||||
private boolean pathIsTooDivergentFromReference( final Cigar c ) {
|
||||
for( final CigarElement ce : c.getCigarElements() ) {
|
||||
if( ce.getOperator().equals(CigarOperator.N) ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Left align the given cigar sequentially. This is needed because AlignmentUtils doesn't accept cigars with more than one indel in them.
|
||||
* This is a target of future work to incorporate and generalize into AlignmentUtils for use by others.
|
||||
* @param cigar the cigar to left align
|
||||
* @param refSeq the reference byte array
|
||||
* @param readSeq the read byte array
|
||||
* @param refIndex 0-based alignment start position on ref
|
||||
* @param readIndex 0-based alignment start position on read
|
||||
* @return the left-aligned cigar
|
||||
*/
|
||||
@Ensures({"cigar != null", "refSeq != null", "readSeq != null", "refIndex >= 0", "readIndex >= 0"})
|
||||
protected static Cigar leftAlignCigarSequentially(final Cigar cigar, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) {
|
||||
final Cigar cigarToReturn = new Cigar();
|
||||
Cigar cigarToAlign = new Cigar();
|
||||
for (int i = 0; i < cigar.numCigarElements(); i++) {
|
||||
final CigarElement ce = cigar.getCigarElement(i);
|
||||
if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) {
|
||||
cigarToAlign.add(ce);
|
||||
for( final CigarElement toAdd : AlignmentUtils.leftAlignIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false).getCigarElements() ) {
|
||||
cigarToReturn.add(toAdd);
|
||||
}
|
||||
refIndex += cigarToAlign.getReferenceLength();
|
||||
readIndex += cigarToAlign.getReadLength();
|
||||
cigarToAlign = new Cigar();
|
||||
} else {
|
||||
cigarToAlign.add(ce);
|
||||
}
|
||||
}
|
||||
if( !cigarToAlign.isEmpty() ) {
|
||||
for( final CigarElement toAdd : cigarToAlign.getCigarElements() ) {
|
||||
cigarToReturn.add(toAdd);
|
||||
}
|
||||
}
|
||||
return cigarToReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take a haplotype which was generated by injecting an allele into a string of bases and run SW against the reference to determine the variants on the haplotype.
|
||||
* Unfortunately since this haplotype didn't come from the assembly graph you can't straightforwardly use the bubble traversal algorithm to get this information.
|
||||
* This is a target for future work as we rewrite the HaplotypeCaller to be more bubble-caller based.
|
||||
* @param haplotype the candidate haplotype
|
||||
* @param ref the reference bases to align against
|
||||
* @param haplotypeList the current list of haplotypes
|
||||
* @param activeRegionStart the start of the active region in the reference byte array
|
||||
* @param activeRegionStop the stop of the active region in the reference byte array
|
||||
* @param FORCE_INCLUSION_FOR_GGA_MODE if true will include in the list even if it already exists
|
||||
* @return true if the candidate haplotype was successfully incorporated into the haplotype list
|
||||
*/
|
||||
@Requires({"ref != null", "ref.length >= activeRegionStop - activeRegionStart"})
|
||||
private boolean addHaplotypeForGGA( final Haplotype haplotype, final byte[] ref, final List<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
|
||||
if( haplotype == null ) { return false; }
|
||||
|
||||
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
|
||||
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );
|
||||
|
||||
if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 ) { // protect against SW failures
|
||||
if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 || swConsensus.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments
|
||||
return false;
|
||||
}
|
||||
|
||||
final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true );
|
||||
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0, true) );
|
||||
|
||||
final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate(haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true);
|
||||
int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true );
|
||||
if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) {
|
||||
hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal
|
||||
}
|
||||
byte[] newHaplotypeBases;
|
||||
// extend partial haplotypes to contain the full active region sequence
|
||||
int leftBreakPoint = 0;
|
||||
int rightBreakPoint = 0;
|
||||
if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
|
||||
newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()),
|
||||
haplotype.getBases()),
|
||||
ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) );
|
||||
leftBreakPoint = swConsensus.getAlignmentStart2wrt1() - activeRegionStart;
|
||||
rightBreakPoint = leftBreakPoint + haplotype.getBases().length;
|
||||
//newHaplotypeBases = haplotype.getBases();
|
||||
//return false; // piece of haplotype isn't anchored within the active region so don't build a haplotype out of it
|
||||
haplotype.getBases()),
|
||||
ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) );
|
||||
} else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
|
||||
//return false;
|
||||
newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) );
|
||||
//newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), 0, hapStop);
|
||||
leftBreakPoint = swConsensus.getAlignmentStart2wrt1() - activeRegionStart;
|
||||
} else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
|
||||
//return false;
|
||||
newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) );
|
||||
//newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length);
|
||||
rightBreakPoint = haplotype.getBases().length - hapStart;
|
||||
} else {
|
||||
newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop);
|
||||
}
|
||||
|
|
@ -424,16 +569,15 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||
|
||||
h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
|
||||
h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) );
|
||||
if ( haplotype.isArtificialHaplotype() ) {
|
||||
h.setArtificialEvent(haplotype.getArtificialEvent());
|
||||
}
|
||||
h.leftBreakPoint = leftBreakPoint;
|
||||
h.rightBreakPoint = rightBreakPoint;
|
||||
if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures
|
||||
if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart || swConsensus2.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments
|
||||
return false;
|
||||
}
|
||||
|
||||
h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0, true) );
|
||||
|
||||
if( FORCE_INCLUSION_FOR_GGA_MODE || !haplotypeList.contains(h) ) {
|
||||
haplotypeList.add(h);
|
||||
return true;
|
||||
|
|
@ -0,0 +1,321 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 2/6/13
|
||||
*/
|
||||
|
||||
public class DeBruijnAssemblyGraph extends DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> {
|
||||
|
||||
public DeBruijnAssemblyGraph() {
|
||||
super(DeBruijnEdge.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param v the vertex to test
|
||||
* @return true if this vertex is a reference node (meaning that it appears on the reference path in the graph)
|
||||
*/
|
||||
public boolean isReferenceNode( final DeBruijnVertex v ) {
|
||||
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
|
||||
for( final DeBruijnEdge e : edgesOf(v) ) {
|
||||
if( e.isRef() ) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param v the vertex to test
|
||||
* @return true if this vertex is a source node
|
||||
*/
|
||||
public boolean isSource( final DeBruijnVertex v ) {
|
||||
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
|
||||
return inDegreeOf(v) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull out the additional sequence implied by traversing this node in the graph
|
||||
* @param v the vertex from which to pull out the additional base sequence
|
||||
* @return non-null byte array
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
public byte[] getAdditionalSequence( final DeBruijnVertex v ) {
|
||||
if( v == null ) { throw new IllegalArgumentException("Attempting to pull sequence from a null vertex."); }
|
||||
return ( isSource(v) ? v.getSequence() : v.getSuffix() );
|
||||
}
|
||||
|
||||
/**
|
||||
* @param e the edge to test
|
||||
* @return true if this edge is a reference source edge
|
||||
*/
|
||||
public boolean isRefSource( final DeBruijnEdge e ) {
|
||||
if( e == null ) { throw new IllegalArgumentException("Attempting to test a null edge."); }
|
||||
for( final DeBruijnEdge edgeToTest : incomingEdgesOf(getEdgeSource(e)) ) {
|
||||
if( edgeToTest.isRef() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param v the vertex to test
|
||||
* @return true if this vertex is a reference source
|
||||
*/
|
||||
public boolean isRefSource( final DeBruijnVertex v ) {
|
||||
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
|
||||
for( final DeBruijnEdge edgeToTest : incomingEdgesOf(v) ) {
|
||||
if( edgeToTest.isRef() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param e the edge to test
|
||||
* @return true if this edge is a reference sink edge
|
||||
*/
|
||||
public boolean isRefSink( final DeBruijnEdge e ) {
|
||||
if( e == null ) { throw new IllegalArgumentException("Attempting to test a null edge."); }
|
||||
for( final DeBruijnEdge edgeToTest : outgoingEdgesOf(getEdgeTarget(e)) ) {
|
||||
if( edgeToTest.isRef() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param v the vertex to test
|
||||
* @return true if this vertex is a reference sink
|
||||
*/
|
||||
public boolean isRefSink( final DeBruijnVertex v ) {
|
||||
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
|
||||
for( final DeBruijnEdge edgeToTest : outgoingEdgesOf(v) ) {
|
||||
if( edgeToTest.isRef() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the reference source vertex pulled from the graph, can be null if it doesn't exist in the graph
|
||||
*/
|
||||
public DeBruijnVertex getReferenceSourceVertex( ) {
|
||||
for( final DeBruijnVertex v : vertexSet() ) {
|
||||
if( isReferenceNode(v) && isRefSource(v) ) {
|
||||
return v;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the reference sink vertex pulled from the graph, can be null if it doesn't exist in the graph
|
||||
*/
|
||||
public DeBruijnVertex getReferenceSinkVertex( ) {
|
||||
for( final DeBruijnVertex v : vertexSet() ) {
|
||||
if( isReferenceNode(v) && isRefSink(v) ) {
|
||||
return v;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse the graph and get the next reference vertex if it exists
|
||||
* @param v the current vertex, can be null
|
||||
* @return the next reference vertex if it exists
|
||||
*/
|
||||
public DeBruijnVertex getNextReferenceVertex( final DeBruijnVertex v ) {
|
||||
if( v == null ) { return null; }
|
||||
for( final DeBruijnEdge edgeToTest : outgoingEdgesOf(v) ) {
|
||||
if( edgeToTest.isRef() ) {
|
||||
return getEdgeTarget(edgeToTest);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse the graph and get the previous reference vertex if it exists
|
||||
* @param v the current vertex, can be null
|
||||
* @return the previous reference vertex if it exists
|
||||
*/
|
||||
public DeBruijnVertex getPrevReferenceVertex( final DeBruijnVertex v ) {
|
||||
if( v == null ) { return null; }
|
||||
for( final DeBruijnEdge edgeToTest : incomingEdgesOf(v) ) {
|
||||
if( isReferenceNode(getEdgeSource(edgeToTest)) ) {
|
||||
return getEdgeSource(edgeToTest);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does a reference path exist between the two vertices?
|
||||
* @param fromVertex from this vertex, can be null
|
||||
* @param toVertex to this vertex, can be null
|
||||
* @return true if a reference path exists in the graph between the two vertices
|
||||
*/
|
||||
public boolean referencePathExists(final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex) {
|
||||
DeBruijnVertex v = fromVertex;
|
||||
if( v == null ) {
|
||||
return false;
|
||||
}
|
||||
v = getNextReferenceVertex(v);
|
||||
if( v == null ) {
|
||||
return false;
|
||||
}
|
||||
while( !v.equals(toVertex) ) {
|
||||
v = getNextReferenceVertex(v);
|
||||
if( v == null ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk along the reference path in the graph and pull out the corresponding bases
|
||||
* @param fromVertex starting vertex
|
||||
* @param toVertex ending vertex
|
||||
* @param includeStart should the starting vertex be included in the path
|
||||
* @param includeStop should the ending vertex be included in the path
|
||||
* @return byte[] array holding the reference bases, this can be null if there are no nodes between the starting and ending vertex (insertions for example)
|
||||
*/
|
||||
public byte[] getReferenceBytes( final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex, final boolean includeStart, final boolean includeStop ) {
|
||||
if( fromVertex == null ) { throw new IllegalArgumentException("Starting vertex in requested path cannot be null."); }
|
||||
if( toVertex == null ) { throw new IllegalArgumentException("From vertex in requested path cannot be null."); }
|
||||
|
||||
byte[] bytes = null;
|
||||
DeBruijnVertex v = fromVertex;
|
||||
if( includeStart ) {
|
||||
bytes = ArrayUtils.addAll(bytes, getAdditionalSequence(v));
|
||||
}
|
||||
v = getNextReferenceVertex(v); // advance along the reference path
|
||||
while( v != null && !v.equals(toVertex) ) {
|
||||
bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(v) );
|
||||
v = getNextReferenceVertex(v); // advance along the reference path
|
||||
}
|
||||
if( includeStop && v != null && v.equals(toVertex)) {
|
||||
bytes = ArrayUtils.addAll(bytes, getAdditionalSequence(v));
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull kmers out of the given long sequence and throw them on in the graph
|
||||
* @param sequence byte array holding the sequence with which to build the assembly graph
|
||||
* @param KMER_LENGTH the desired kmer length to use
|
||||
* @param isRef if true the kmers added to the graph will have reference edges linking them
|
||||
*/
|
||||
public void addSequenceToGraph( final byte[] sequence, final int KMER_LENGTH, final boolean isRef ) {
|
||||
if( sequence.length < KMER_LENGTH + 1 ) { throw new IllegalArgumentException("Provided sequence is too small for the given kmer length"); }
|
||||
final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
|
||||
for( int iii = 0; iii < kmersInSequence - 1; iii++ ) {
|
||||
addKmersToGraph(Arrays.copyOfRange(sequence, iii, iii + KMER_LENGTH), Arrays.copyOfRange(sequence, iii + 1, iii + 1 + KMER_LENGTH), isRef);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add edge to assembly graph connecting the two kmers
|
||||
* @param kmer1 the source kmer for the edge
|
||||
* @param kmer2 the target kmer for the edge
|
||||
* @param isRef true if the added edge is a reference edge
|
||||
* @return will return false if trying to add a reference edge which creates a cycle in the assembly graph
|
||||
*/
|
||||
public boolean addKmersToGraph( final byte[] kmer1, final byte[] kmer2, final boolean isRef ) {
|
||||
if( kmer1 == null ) { throw new IllegalArgumentException("Attempting to add a null kmer to the graph."); }
|
||||
if( kmer2 == null ) { throw new IllegalArgumentException("Attempting to add a null kmer to the graph."); }
|
||||
if( kmer1.length != kmer2.length ) { throw new IllegalArgumentException("Attempting to add a kmers to the graph with different lengths."); }
|
||||
|
||||
final int numVertexBefore = vertexSet().size();
|
||||
final DeBruijnVertex v1 = new DeBruijnVertex( kmer1, kmer1.length );
|
||||
addVertex(v1);
|
||||
final DeBruijnVertex v2 = new DeBruijnVertex( kmer2, kmer2.length );
|
||||
addVertex(v2);
|
||||
if( isRef && vertexSet().size() == numVertexBefore ) { return false; }
|
||||
|
||||
final DeBruijnEdge targetEdge = getEdge(v1, v2);
|
||||
if ( targetEdge == null ) {
|
||||
addEdge(v1, v2, new DeBruijnEdge( isRef ));
|
||||
} else {
|
||||
if( isRef ) {
|
||||
targetEdge.setIsRef( true );
|
||||
}
|
||||
targetEdge.setMultiplicity(targetEdge.getMultiplicity() + 1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print out the graph in the dot language for visualization
|
||||
* @param GRAPH_WRITER PrintStream to write to
|
||||
*/
|
||||
public void printGraph( final PrintStream GRAPH_WRITER ) {
|
||||
if( GRAPH_WRITER == null ) { throw new IllegalArgumentException("PrintStream cannot be null."); }
|
||||
|
||||
GRAPH_WRITER.println("digraph assembly {");
|
||||
for( final DeBruijnEdge edge : edgeSet() ) {
|
||||
GRAPH_WRITER.println("\t" + getEdgeSource(edge).toString() + " -> " + getEdgeTarget(edge).toString() + " [" + "label=\""+ edge.getMultiplicity() +"\"" + "];");
|
||||
if( edge.isRef() ) {
|
||||
GRAPH_WRITER.println("\t" + getEdgeSource(edge).toString() + " -> " + getEdgeTarget(edge).toString() + " [color=red];");
|
||||
}
|
||||
}
|
||||
for( final DeBruijnVertex v : vertexSet() ) {
|
||||
final String label = ( inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
|
||||
GRAPH_WRITER.println("\t" + v.toString() + " [label=\"" + label + "\"]");
|
||||
}
|
||||
GRAPH_WRITER.println("}");
|
||||
}
|
||||
}
|
||||
|
|
@ -95,12 +95,12 @@ public class DeBruijnEdge {
|
|||
}
|
||||
|
||||
// For use when comparing edges pulled from the same graph
|
||||
public boolean equals( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
|
||||
public boolean equals( final DeBruijnAssemblyGraph graph, final DeBruijnEdge edge ) {
|
||||
return (graph.getEdgeSource(this).equals(graph.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph.getEdgeTarget(edge)));
|
||||
}
|
||||
|
||||
// For use when comparing edges across graphs!
|
||||
public boolean equals( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph2 ) {
|
||||
public boolean equals( final DeBruijnAssemblyGraph graph, final DeBruijnEdge edge, final DeBruijnAssemblyGraph graph2 ) {
|
||||
return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge)));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public class DeBruijnVertex {
|
|||
}
|
||||
|
||||
public String getSuffixString() {
|
||||
return new String( getSuffix() );
|
||||
return new String(getSuffix());
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
|
|
|
|||
|
|
@ -79,17 +79,87 @@ public class GenotypingEngine {
|
|||
noCall.add(Allele.NO_CALL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Carries the result of a call to #assignGenotypeLikelihoods
|
||||
*/
|
||||
public static class CalledHaplotypes {
|
||||
private final List<VariantContext> calls;
|
||||
private final Set<Haplotype> calledHaplotypes;
|
||||
|
||||
protected CalledHaplotypes(final List<VariantContext> calls, final Set<Haplotype> calledHaplotypes) {
|
||||
if ( calls == null ) throw new IllegalArgumentException("calls cannot be null");
|
||||
if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null");
|
||||
if ( Utils.xor(calls.isEmpty(), calledHaplotypes.isEmpty()) )
|
||||
throw new IllegalArgumentException("Calls and calledHaplotypes should both be empty or both not but got calls=" + calls + " calledHaplotypes=" + calledHaplotypes);
|
||||
this.calls = calls;
|
||||
this.calledHaplotypes = calledHaplotypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of calls made at this location
|
||||
* @return a non-null (but potentially empty) list of calls
|
||||
*/
|
||||
public List<VariantContext> getCalls() {
|
||||
return calls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the set of haplotypes that we actually called (i.e., underlying one of the VCs in getCalls().
|
||||
* @return a non-null set of haplotypes
|
||||
*/
|
||||
public Set<Haplotype> getCalledHaplotypes() {
|
||||
return calledHaplotypes;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point of class - given a particular set of haplotypes, samples and reference context, compute
|
||||
* genotype likelihoods and assemble into a list of variant contexts and genomic events ready for calling
|
||||
*
|
||||
* @param UG_engine UG Engine with basic input parameters
|
||||
* @param haplotypes Haplotypes to assign likelihoods to
|
||||
* @param samples Samples to genotype
|
||||
* @param haplotypeReadMap Map from reads->(haplotypes,likelihoods)
|
||||
* @param perSampleFilteredReadList
|
||||
* @param ref Reference bytes at active region
|
||||
* @param refLoc Corresponding active region genome location
|
||||
* @param activeRegionWindow Active window
|
||||
* @param genomeLocParser GenomeLocParser
|
||||
* @param activeAllelesToGenotype Alleles to genotype
|
||||
* @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes
|
||||
*/
|
||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||
public List<VariantContext> assignGenotypeLikelihoods( final UnifiedGenotyperEngine UG_engine,
|
||||
final List<Haplotype> haplotypes,
|
||||
final List<String> samples,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final List<VariantContext> activeAllelesToGenotype ) {
|
||||
@Ensures("result != null")
|
||||
// TODO - can this be refactored? this is hard to follow!
|
||||
public CalledHaplotypes assignGenotypeLikelihoods( final UnifiedGenotyperEngine UG_engine,
|
||||
final List<Haplotype> haplotypes,
|
||||
final List<String> samples,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final List<VariantContext> activeAllelesToGenotype ) {
|
||||
// sanity check input arguments
|
||||
if (UG_engine == null)
|
||||
throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
||||
if (haplotypes == null || haplotypes.isEmpty())
|
||||
throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
|
||||
if (samples == null || samples.isEmpty())
|
||||
throw new IllegalArgumentException("samples input must be non-empty and non-null, got "+samples);
|
||||
if (haplotypeReadMap == null || haplotypeReadMap.isEmpty())
|
||||
throw new IllegalArgumentException("haplotypeReadMap input should be non-empty and non-null, got "+haplotypeReadMap);
|
||||
if (ref == null || ref.length == 0 )
|
||||
throw new IllegalArgumentException("ref bytes input should be non-empty and non-null, got "+ref);
|
||||
if (refLoc == null || refLoc.getStop()-refLoc.getStart()+1 != ref.length)
|
||||
throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc);
|
||||
if (activeRegionWindow == null )
|
||||
throw new IllegalArgumentException("activeRegionWindow must be non-null, got "+activeRegionWindow);
|
||||
if (activeAllelesToGenotype == null )
|
||||
throw new IllegalArgumentException("activeAllelesToGenotype must be non-null, got "+activeAllelesToGenotype);
|
||||
if (genomeLocParser == null )
|
||||
throw new IllegalArgumentException("genomeLocParser must be non-null, got "+genomeLocParser);
|
||||
|
||||
final List<VariantContext> returnCalls = new ArrayList<VariantContext>();
|
||||
final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();
|
||||
|
|
@ -105,7 +175,6 @@ public class GenotypingEngine {
|
|||
if( DEBUG ) {
|
||||
System.out.println( h.toString() );
|
||||
System.out.println( "> Cigar = " + h.getCigar() );
|
||||
System.out.println( "> Left and right breaks = (" + h.leftBreakPoint + " , " + h.rightBreakPoint + ")");
|
||||
System.out.println( ">> Events = " + h.getEventMap());
|
||||
}
|
||||
}
|
||||
|
|
@ -121,6 +190,8 @@ public class GenotypingEngine {
|
|||
}
|
||||
}
|
||||
|
||||
final Set<Haplotype> calledHaplotypes = new HashSet<Haplotype>();
|
||||
|
||||
// Walk along each position in the key set and create each event to be outputted
|
||||
for( final int loc : startPosKeySet ) {
|
||||
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
|
||||
|
|
@ -180,7 +251,7 @@ public class GenotypingEngine {
|
|||
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
|
||||
throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles.");
|
||||
}
|
||||
final Map<VariantContext, Allele> mergeMap = new HashMap<VariantContext, Allele>();
|
||||
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<VariantContext, Allele>();
|
||||
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
|
||||
for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) {
|
||||
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
|
||||
|
|
@ -203,6 +274,10 @@ public class GenotypingEngine {
|
|||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
|
||||
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
|
||||
|
||||
// maintain the set of all called haplotypes
|
||||
for ( final Allele calledAllele : call.getAlleles() )
|
||||
calledHaplotypes.addAll(alleleMapper.get(calledAllele));
|
||||
|
||||
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
}
|
||||
|
|
@ -211,9 +286,18 @@ public class GenotypingEngine {
|
|||
}
|
||||
}
|
||||
}
|
||||
return returnCalls;
|
||||
return new CalledHaplotypes(returnCalls, calledHaplotypes);
|
||||
}
|
||||
|
||||
/**
|
||||
* For a particular event described in inputVC, form PL vector for each sample by looking into allele read map and filling likelihood matrix for each allele
|
||||
* @param samples List of samples to genotype
|
||||
* @param alleleReadMap Allele map describing mapping from reads to alleles and corresponding likelihoods
|
||||
* @param mergedVC Input VC with event to genotype
|
||||
* @return GenotypesContext object wrapping genotype objects with PLs
|
||||
*/
|
||||
@Requires({"samples != null","alleleReadMap!= null", "mergedVC != null"})
|
||||
@Ensures("result != null")
|
||||
private GenotypesContext calculateGLsForThisEvent( final List<String> samples, final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap, final VariantContext mergedVC ) {
|
||||
final GenotypesContext genotypes = GenotypesContext.create(samples.size());
|
||||
// Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
|
||||
|
|
@ -254,7 +338,7 @@ public class GenotypingEngine {
|
|||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final VariantContext call ) {
|
||||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new LinkedHashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
final GenomeLoc callLoc = parser.createGenomeLoc(call);
|
||||
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perSampleReadMap.entrySet() ) {
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
|
|
@ -283,6 +367,12 @@ public class GenotypingEngine {
|
|||
return returnMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes symbolic events from list of haplotypes
|
||||
* @param haplotypes Input/output list of haplotypes, before/after removal
|
||||
*/
|
||||
// TODO - split into input haplotypes and output haplotypes as not to share I/O arguments
|
||||
@Requires("haplotypes != null")
|
||||
protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
|
||||
final List<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
|
|
@ -308,7 +398,7 @@ public class GenotypingEngine {
|
|||
final double downsamplingFraction,
|
||||
final PrintStream downsamplingLog ) {
|
||||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new LinkedHashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
for( final Map.Entry<Allele, List<Haplotype>> alleleMapperEntry : alleleMapper.entrySet() ) { // for each output allele
|
||||
|
|
@ -316,7 +406,7 @@ public class GenotypingEngine {
|
|||
for( final Map.Entry<GATKSAMRecord, Map<Allele,Double>> readEntry : haplotypeReadMapEntry.getValue().getLikelihoodReadMap().entrySet() ) { // for each read
|
||||
double maxLikelihood = Double.NEGATIVE_INFINITY;
|
||||
for( final Map.Entry<Allele,Double> alleleDoubleEntry : readEntry.getValue().entrySet() ) { // for each input allele
|
||||
if( mappedHaplotypes.contains( new Haplotype(alleleDoubleEntry.getKey().getBases())) ) { // exact match of haplotype base string
|
||||
if( mappedHaplotypes.contains( new Haplotype(alleleDoubleEntry.getKey())) ) { // exact match of haplotype base string
|
||||
maxLikelihood = Math.max( maxLikelihood, alleleDoubleEntry.getValue() );
|
||||
}
|
||||
}
|
||||
|
|
@ -330,6 +420,15 @@ public class GenotypingEngine {
|
|||
return alleleReadMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO - comment me, clean me, refactor me!
|
||||
* @param haplotypes
|
||||
* @param samples
|
||||
* @param haplotypeReadMap
|
||||
* @param startPosKeySet
|
||||
* @param ref
|
||||
* @param refLoc
|
||||
*/
|
||||
protected void mergeConsecutiveEventsBasedOnLD( final List<Haplotype> haplotypes,
|
||||
final List<String> samples,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||
|
|
@ -381,7 +480,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
// count up the co-occurrences of the events for the R^2 calculation
|
||||
for( final String sample : samples ) {
|
||||
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h.getBases())) )[0][0];
|
||||
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h, true)) )[0][0];
|
||||
if( thisHapVC == null ) {
|
||||
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
|
||||
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
|
||||
|
|
@ -474,7 +573,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
protected static Map<Allele, List<Haplotype>> createAlleleMapper( final Map<VariantContext, Allele> mergeMap, final Map<Event, List<Haplotype>> eventMap ) {
|
||||
final Map<Allele, List<Haplotype>> alleleMapper = new HashMap<Allele, List<Haplotype>>();
|
||||
final Map<Allele, List<Haplotype>> alleleMapper = new LinkedHashMap<Allele, List<Haplotype>>();
|
||||
for( final Map.Entry<VariantContext, Allele> entry : mergeMap.entrySet() ) {
|
||||
alleleMapper.put(entry.getValue(), eventMap.get(new Event(entry.getKey())));
|
||||
}
|
||||
|
|
@ -485,7 +584,7 @@ public class GenotypingEngine {
|
|||
@Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
|
||||
protected static Map<Event, List<Haplotype>> createEventMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
|
||||
|
||||
final Map<Event, List<Haplotype>> eventMapper = new HashMap<Event, List<Haplotype>>(eventsAtThisLoc.size()+1);
|
||||
final Map<Event, List<Haplotype>> eventMapper = new LinkedHashMap<Event, List<Haplotype>>(eventsAtThisLoc.size()+1);
|
||||
VariantContext refVC = eventsAtThisLoc.get(0); // the genome loc is the only safe thing to pull out of this VC because ref/alt pairs might change reference basis
|
||||
eventMapper.put(new Event(null), new ArrayList<Haplotype>());
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
|
|
@ -598,13 +697,14 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
|
||||
final Map<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
|
||||
final Map<Integer,VariantContext> vcs = new LinkedHashMap<Integer,VariantContext>();
|
||||
|
||||
int refPos = alignmentStartHapwrtRef;
|
||||
if( refPos < 0 ) { return null; } // Protection against SW failures
|
||||
int alignmentPos = 0;
|
||||
|
||||
for( final CigarElement ce : cigar.getCigarElements() ) {
|
||||
for( int cigarIndex = 0; cigarIndex < cigar.numCigarElements(); cigarIndex++ ) {
|
||||
final CigarElement ce = cigar.getCigarElement(cigarIndex);
|
||||
final int elementLength = ce.getLength();
|
||||
switch( ce.getOperator() ) {
|
||||
case I:
|
||||
|
|
@ -615,7 +715,7 @@ public class GenotypingEngine {
|
|||
if( BaseUtils.isRegularBase(refByte) ) {
|
||||
insertionAlleles.add( Allele.create(refByte, true) );
|
||||
}
|
||||
if( (haplotype.leftBreakPoint != 0 || haplotype.rightBreakPoint != 0) && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||
if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
} else {
|
||||
byte[] insertionBases = new byte[]{};
|
||||
|
|
@ -641,20 +741,12 @@ public class GenotypingEngine {
|
|||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
||||
final List<Allele> deletionAlleles = new ArrayList<Allele>();
|
||||
final int deletionStart = refLoc.getStart() + refPos - 1;
|
||||
// BUGBUG: how often does this symbolic deletion allele case happen?
|
||||
//if( haplotype != null && ( (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength)
|
||||
// || (haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength) ) ) {
|
||||
// deletionAlleles.add( Allele.create(ref[refPos-1], true) );
|
||||
// deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
|
||||
//} else {
|
||||
final byte refByte = ref[refPos-1];
|
||||
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
|
||||
deletionAlleles.add( Allele.create(deletionBases, true) );
|
||||
deletionAlleles.add( Allele.create(refByte, false) );
|
||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
||||
}
|
||||
//}
|
||||
refPos += elementLength;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,10 +47,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -73,21 +71,23 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
|||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -129,11 +129,11 @@ import java.util.*;
|
|||
* @since 8/22/11
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
@ActiveRegionTraversalParameters(extension=65, maxRegion=300)
|
||||
@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=20)
|
||||
@ActiveRegionTraversalParameters(extension=85, maxRegion=300)
|
||||
@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=250)
|
||||
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
|
||||
|
||||
/**
|
||||
|
|
@ -146,15 +146,39 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
protected PrintStream graphWriter = null;
|
||||
|
||||
/**
|
||||
* The assembled haplotypes will be written as BAM to this file if requested. Really for debugging purposes only. Note that the output here
|
||||
* does not include uninformative reads so that not every input read is emitted to the bam.
|
||||
* The assembled haplotypes will be written as BAM to this file if requested. Really for debugging purposes only.
|
||||
* Note that the output here does not include uninformative reads so that not every input read is emitted to the bam.
|
||||
*
|
||||
* Turning on this mode may result in serious performance cost for the HC. It's really only approprate to
|
||||
* use in specific areas where you want to better understand why the HC is making specific calls.
|
||||
*
|
||||
* The reads are written out containing a HC tag (integer) that encodes which haplotype each read best matches
|
||||
* according to the haplotype caller's likelihood calculation. The use of this tag is primarily intended
|
||||
* to allow good coloring of reads in IGV. Simply go to Color Alignments By > Tag and enter HC to more
|
||||
* easily see which reads go with these haplotype.
|
||||
*
|
||||
* Note that the haplotypes (called or all, depending on mode) are emitted as single reads covering the entire
|
||||
* active region, coming from read HC and a special read group.
|
||||
*
|
||||
* Note that only reads that are actually informative about the haplotypes are emitted. By informative we mean
|
||||
* that there's a meaningful difference in the likelihood of the read coming from one haplotype compared to
|
||||
* its next best haplotype.
|
||||
*
|
||||
* The best way to visualize the output of this mode is with IGV. Tell IGV to color the alignments by tag,
|
||||
* and give it the HC tag, so you can see which reads support each haplotype. Finally, you can tell IGV
|
||||
* to group by sample, which will separate the potential haplotypes from the reads. All of this can be seen
|
||||
* in the following screenshot: https://www.dropbox.com/s/xvy7sbxpf13x5bp/haplotypecaller%20bamout%20for%20docs.png
|
||||
*
|
||||
*/
|
||||
@Hidden
|
||||
@Output(fullName="bamOutput", shortName="bam", doc="File to which assembled haplotypes should be written", required = false)
|
||||
@Output(fullName="bamOutput", shortName="bamout", doc="File to which assembled haplotypes should be written", required = false)
|
||||
protected StingSAMFileWriter bamWriter = null;
|
||||
private SAMFileHeader bamHeader = null;
|
||||
private long uniqueNameCounter = 1;
|
||||
private final static String readGroupId = "ArtificialHaplotype";
|
||||
private HaplotypeBAMWriter haplotypeBAMWriter;
|
||||
|
||||
/**
|
||||
* The type of BAM output we want to see.
|
||||
*/
|
||||
@Output(fullName="bamWriterType", shortName="bamWriterType", doc="How should haplotypes be written to the BAM?", required = false)
|
||||
public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES;
|
||||
|
||||
/**
|
||||
* The PairHMM implementation to use for genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime.
|
||||
|
|
@ -269,7 +293,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
private CachingIndexedFastaSequenceFile referenceReader;
|
||||
|
||||
// reference base padding size
|
||||
private static final int REFERENCE_PADDING = 900;
|
||||
private static final int REFERENCE_PADDING = 500;
|
||||
|
||||
// bases with quality less than or equal to this value are trimmed off the tails of the reads
|
||||
private static final byte MIN_TAIL_QUALITY = 20;
|
||||
|
|
@ -305,9 +329,18 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.CONTAMINATION_FRACTION = 0.0;
|
||||
simpleUAC.CONTAMINATION_FRACTION_FILE=null;
|
||||
simpleUAC.exactCallsLog = null;
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// Currently, per-sample contamination level is only implemented for UG
|
||||
if( UAC.CONTAMINATION_FRACTION_FILE !=null) {
|
||||
throw new UserException("Per-Sample contamination level not supported in Haplotype Caller at this point");
|
||||
}
|
||||
|
||||
// when we do implement per-sample contamination for HC, this will probably be needed.
|
||||
// UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, samples, logger));
|
||||
|
||||
// initialize the output VCF header
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
|
||||
|
|
@ -340,12 +373,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
||||
}
|
||||
|
||||
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter, minKmer );
|
||||
assemblyEngine = new DeBruijnAssembler( DEBUG, graphWriter, minKmer );
|
||||
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
|
||||
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );
|
||||
|
||||
if ( bamWriter != null )
|
||||
setupBamWriter();
|
||||
haplotypeBAMWriter = HaplotypeBAMWriter.create(bamWriterType, bamWriter, getToolkit().getSAMFileHeader());
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -465,17 +498,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
if( activeRegion.size() == 0 && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { return 0; } // No reads here so nothing to do!
|
||||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && activeAllelesToGenotype.isEmpty() ) { return 0; } // No alleles found in this region so nothing to do!
|
||||
|
||||
finalizeActiveRegion( activeRegion ); // merge overlapping fragments, clip adapter and low qual tails
|
||||
|
||||
// note this operation must be performed before we clip the reads down, as this must correspond to the full reference region
|
||||
final GenomeLoc fullSpanBeforeClipping = getPaddedLoc(activeRegion);
|
||||
finalizeActiveRegion(activeRegion); // merge overlapping fragments, clip adapter and low qual tails
|
||||
|
||||
final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); // Create the reference haplotype which is the bases from the reference that make up the active region
|
||||
final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING);
|
||||
final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype );
|
||||
final byte[] fullReferenceWithPadding = activeRegion.getActiveRegionReference(referenceReader, REFERENCE_PADDING);
|
||||
final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion);
|
||||
|
||||
final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, MIN_PRUNE_FACTOR, activeAllelesToGenotype );
|
||||
if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do!
|
||||
|
||||
activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region
|
||||
final List<GATKSAMRecord> filteredReads = filterNonPassingReads( activeRegion ); // filter out reads from genotyping which fail mapping quality based criteria
|
||||
if( activeRegion.size() == 0 ) { return 1; } // no reads remain after filtering so nothing else to do!
|
||||
|
||||
|
|
@ -490,38 +521,25 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
final List<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ?
|
||||
likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes );
|
||||
|
||||
for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoods( UG_engine,
|
||||
bestHaplotypes,
|
||||
samplesList,
|
||||
stratifiedReadMap,
|
||||
perSampleFilteredReadList,
|
||||
fullReferenceWithPadding,
|
||||
fullSpanBeforeClipping,
|
||||
activeRegion.getLocation(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
activeAllelesToGenotype ) ) {
|
||||
annotationEngine.annotateDBs(metaDataTracker, getToolkit().getGenomeLocParser().createGenomeLoc(call), call);
|
||||
final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine,
|
||||
bestHaplotypes,
|
||||
samplesList,
|
||||
stratifiedReadMap,
|
||||
perSampleFilteredReadList,
|
||||
fullReferenceWithPadding,
|
||||
paddedReferenceLoc,
|
||||
activeRegion.getLocation(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
activeAllelesToGenotype );
|
||||
|
||||
for( final VariantContext call : calledHaplotypes.getCalls() ) {
|
||||
// TODO -- uncomment this line once ART-based walkers have a proper RefMetaDataTracker.
|
||||
// annotationEngine.annotateDBs(metaDataTracker, getToolkit().getGenomeLocParser().createGenomeLoc(call), call);
|
||||
vcfWriter.add( call );
|
||||
}
|
||||
|
||||
if ( bamWriter != null ) {
|
||||
// write the haplotypes to the bam
|
||||
for ( Haplotype haplotype : haplotypes )
|
||||
writeHaplotype(haplotype, fullSpanBeforeClipping, bestHaplotypes.contains(haplotype));
|
||||
|
||||
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
|
||||
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<Allele, Haplotype>(haplotypes.size());
|
||||
for ( final Haplotype haplotype : haplotypes )
|
||||
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
|
||||
|
||||
// next, output the interesting reads for each sample aligned against the appropriate haplotype
|
||||
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
|
||||
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
|
||||
final Allele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
|
||||
if ( bestAllele != Allele.NO_CALL )
|
||||
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele), fullSpanBeforeClipping.getStart());
|
||||
}
|
||||
}
|
||||
haplotypeBAMWriter.writeReadsAlignedToHaplotypes(haplotypes, paddedReferenceLoc, bestHaplotypes, calledHaplotypes.getCalledHaplotypes(), stratifiedReadMap);
|
||||
}
|
||||
|
||||
if( DEBUG ) { System.out.println("----------------------------------------------------------------------------------"); }
|
||||
|
|
@ -573,7 +591,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
for( final GATKSAMRecord myRead : finalizedReadList ) {
|
||||
final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) );
|
||||
if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
|
||||
final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
|
||||
GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
|
||||
clippedRead = ReadClipper.hardClipToRegion( clippedRead, activeRegion.getExtendedLoc().getStart(), activeRegion.getExtendedLoc().getStop() );
|
||||
if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) {
|
||||
readsToUse.add(clippedRead);
|
||||
}
|
||||
|
|
@ -594,9 +613,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
}
|
||||
|
||||
private GenomeLoc getPaddedLoc( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
final int padLeft = Math.max(activeRegion.getReadSpanLoc().getStart()-REFERENCE_PADDING, 1);
|
||||
final int padRight = Math.min(activeRegion.getReadSpanLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getReadSpanLoc().getContig()).getSequenceLength());
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReadSpanLoc().getContig(), padLeft, padRight);
|
||||
final int padLeft = Math.max(activeRegion.getExtendedLoc().getStart()-REFERENCE_PADDING, 1);
|
||||
final int padRight = Math.min(activeRegion.getExtendedLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getExtendedLoc().getContig()).getSequenceLength());
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getExtendedLoc().getContig(), padLeft, padRight);
|
||||
}
|
||||
|
||||
private Map<String, List<GATKSAMRecord>> splitReadsBySample( final List<GATKSAMRecord> reads ) {
|
||||
|
|
@ -615,92 +634,5 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
return returnMap;
|
||||
}
|
||||
|
||||
private void setupBamWriter() {
|
||||
// prepare the bam header
|
||||
bamHeader = new SAMFileHeader();
|
||||
bamHeader.setSequenceDictionary(getToolkit().getSAMFileHeader().getSequenceDictionary());
|
||||
bamHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
|
||||
// include the original read groups plus a new artificial one for the haplotypes
|
||||
final List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>(getToolkit().getSAMFileHeader().getReadGroups());
|
||||
final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupId);
|
||||
rg.setSample("HC");
|
||||
rg.setSequencingCenter("BI");
|
||||
readGroups.add(rg);
|
||||
bamHeader.setReadGroups(readGroups);
|
||||
|
||||
bamWriter.setPresorted(false);
|
||||
bamWriter.writeHeader(bamHeader);
|
||||
}
|
||||
|
||||
private void writeHaplotype(final Haplotype haplotype, final GenomeLoc paddedRefLoc, final boolean isAmongBestHaplotypes) {
|
||||
final GATKSAMRecord record = new GATKSAMRecord(bamHeader);
|
||||
record.setReadBases(haplotype.getBases());
|
||||
record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef());
|
||||
record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length));
|
||||
record.setCigar(haplotype.getCigar());
|
||||
record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0);
|
||||
record.setReadName("HC" + uniqueNameCounter++);
|
||||
record.setReadUnmappedFlag(false);
|
||||
record.setReferenceIndex(paddedRefLoc.getContigIndex());
|
||||
record.setAttribute(SAMTag.RG.toString(), readGroupId);
|
||||
record.setFlags(16);
|
||||
bamWriter.addAlignment(record);
|
||||
}
|
||||
|
||||
private void writeReadAgainstHaplotype(final GATKSAMRecord read, final Haplotype haplotype, final int referenceStart) {
|
||||
|
||||
final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), read.getReadBases(), 5.0, -10.0, -22.0, -1.2);
|
||||
final int readStartOnHaplotype = swPairwiseAlignment.getAlignmentStart2wrt1();
|
||||
final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;
|
||||
read.setAlignmentStart(readStartOnReference);
|
||||
|
||||
final Cigar cigar = generateReadCigarFromHaplotype(read, readStartOnHaplotype, haplotype.getCigar());
|
||||
read.setCigar(cigar);
|
||||
|
||||
bamWriter.addAlignment(read);
|
||||
}
|
||||
|
||||
private Cigar generateReadCigarFromHaplotype(final GATKSAMRecord read, final int readStartOnHaplotype, final Cigar haplotypeCigar) {
|
||||
|
||||
int currentReadPos = 0;
|
||||
int currentHapPos = 0;
|
||||
final List<CigarElement> readCigarElements = new ArrayList<CigarElement>();
|
||||
|
||||
for ( final CigarElement cigarElement : haplotypeCigar.getCigarElements() ) {
|
||||
|
||||
if ( cigarElement.getOperator() == CigarOperator.D ) {
|
||||
if ( currentReadPos > 0 )
|
||||
readCigarElements.add(cigarElement);
|
||||
} else if ( cigarElement.getOperator() == CigarOperator.M || cigarElement.getOperator() == CigarOperator.I ) {
|
||||
|
||||
final int elementLength = cigarElement.getLength();
|
||||
final int nextReadPos = currentReadPos + elementLength;
|
||||
final int nextHapPos = currentHapPos + elementLength;
|
||||
|
||||
// do we want this element?
|
||||
if ( currentReadPos > 0 ) {
|
||||
// do we want the entire element?
|
||||
if ( nextReadPos < read.getReadLength() ) {
|
||||
readCigarElements.add(cigarElement);
|
||||
currentReadPos = nextReadPos;
|
||||
}
|
||||
// otherwise, we can finish up and return the cigar
|
||||
else {
|
||||
readCigarElements.add(new CigarElement(read.getReadLength() - currentReadPos, cigarElement.getOperator()));
|
||||
return new Cigar(readCigarElements);
|
||||
}
|
||||
}
|
||||
// do we want part of the element to start?
|
||||
else if ( currentReadPos == 0 && nextHapPos > readStartOnHaplotype ) {
|
||||
currentReadPos = Math.min(nextHapPos - readStartOnHaplotype, read.getReadLength());
|
||||
readCigarElements.add(new CigarElement(currentReadPos, cigarElement.getOperator()));
|
||||
}
|
||||
|
||||
currentHapPos = nextHapPos;
|
||||
}
|
||||
}
|
||||
|
||||
return new Cigar(readCigarElements);
|
||||
}
|
||||
}
|
||||
|
|
@ -60,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.Window;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
|
|
@ -104,7 +105,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
||||
@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
|
||||
public class HaplotypeResolver extends RodWalker<Integer, Integer> {
|
||||
|
||||
|
|
|
|||
|
|
@ -52,10 +52,13 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
|
@ -88,15 +91,17 @@ public class KBestPaths {
|
|||
private final int totalScore;
|
||||
|
||||
// the graph from which this path originated
|
||||
private final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph;
|
||||
private final DeBruijnAssemblyGraph graph;
|
||||
|
||||
// used in the bubble state machine to apply Smith-Waterman to the bubble sequence
|
||||
private final double SW_MATCH = 15.0;
|
||||
private final double SW_MISMATCH = -15.0;
|
||||
private final double SW_GAP = -25.0;
|
||||
private final double SW_GAP_EXTEND = -1.2;
|
||||
// these values were chosen via optimization against the NA12878 knowledge base
|
||||
private static final double SW_MATCH = 20.0;
|
||||
private static final double SW_MISMATCH = -15.0;
|
||||
private static final double SW_GAP = -26.0;
|
||||
private static final double SW_GAP_EXTEND = -1.1;
|
||||
private static final byte[] STARTING_SW_ANCHOR_BYTES = "XXXXXXXXX".getBytes();
|
||||
|
||||
public Path( final DeBruijnVertex initialVertex, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||
public Path( final DeBruijnVertex initialVertex, final DeBruijnAssemblyGraph graph ) {
|
||||
lastVertex = initialVertex;
|
||||
edges = new ArrayList<DeBruijnEdge>(0);
|
||||
totalScore = 0;
|
||||
|
|
@ -119,6 +124,8 @@ public class KBestPaths {
|
|||
* @return true if the edge is found in this path
|
||||
*/
|
||||
public boolean containsEdge( final DeBruijnEdge edge ) {
|
||||
if( edge == null ) { throw new IllegalArgumentException("Attempting to test null edge."); }
|
||||
|
||||
for( final DeBruijnEdge e : edges ) {
|
||||
if( e.equals(graph, edge) ) {
|
||||
return true;
|
||||
|
|
@ -128,7 +135,14 @@ public class KBestPaths {
|
|||
return false;
|
||||
}
|
||||
|
||||
public int numInPath( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
|
||||
/**
|
||||
* Calculate the number of times this edge appears in the path
|
||||
* @param edge the given edge to test
|
||||
* @return number of times this edge appears in the path
|
||||
*/
|
||||
public int numInPath( final DeBruijnEdge edge ) {
|
||||
if( edge == null ) { throw new IllegalArgumentException("Attempting to test null edge."); }
|
||||
|
||||
int numInPath = 0;
|
||||
for( final DeBruijnEdge e : edges ) {
|
||||
if( e.equals(graph, edge) ) {
|
||||
|
|
@ -139,13 +153,17 @@ public class KBestPaths {
|
|||
return numInPath;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Does this path contain a reference edge?
|
||||
* @return true if the path contains a reference edge
|
||||
*/
|
||||
public boolean containsRefEdge() {
|
||||
for( final DeBruijnEdge e : edges ) {
|
||||
if( e.isRef() ) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public List<DeBruijnEdge> getEdges() { return edges; }
|
||||
|
||||
public int getScore() { return totalScore; }
|
||||
|
|
@ -153,41 +171,31 @@ public class KBestPaths {
|
|||
public DeBruijnVertex getLastVertexInPath() { return lastVertex; }
|
||||
|
||||
/**
|
||||
* The base sequence for this path. Pull the full sequence for the source of the path and then the suffix for all subsequent nodes
|
||||
* The base sequence for this path. Pull the full sequence for source nodes and then the suffix for all subsequent nodes
|
||||
* @return non-null sequence of bases corresponding to this path
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
public byte[] getBases() {
|
||||
if( edges.size() == 0 ) { return lastVertex.getSequence(); }
|
||||
if( edges.size() == 0 ) { return graph.getAdditionalSequence(lastVertex); }
|
||||
|
||||
byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence();
|
||||
byte[] bases = graph.getAdditionalSequence(graph.getEdgeSource(edges.get(0)));
|
||||
for( final DeBruijnEdge e : edges ) {
|
||||
bases = ArrayUtils.addAll(bases, graph.getEdgeTarget( e ).getSuffix());
|
||||
bases = ArrayUtils.addAll(bases, graph.getAdditionalSequence(graph.getEdgeTarget(e)));
|
||||
}
|
||||
return bases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull the added base sequence implied by visiting this node in a path
|
||||
* @param graph the graph from which the vertex originated
|
||||
* @param v the vertex whose sequence to grab
|
||||
* @return non-null sequence of bases corresponding to this node in the graph
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
public byte[] getAdditionalSequence( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||
return ( edges.size()==0 || graph.getEdgeSource(edges.get(0)).equals(v) ? v.getSequence() : v.getSuffix() );
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the cigar string for this path using a bubble traversal of the assembly graph and running a Smith-Waterman alignment on each bubble
|
||||
* @return non-null Cigar string with reference length equal to the refHaplotype's reference length
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public Cigar calculateCigar() {
|
||||
|
||||
final Cigar cigar = new Cigar();
|
||||
// special case for paths that start on reference but not at the reference source node
|
||||
if( edges.get(0).isRef() && !isRefSource(graph, edges.get(0)) ) {
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, null, graph.getEdgeSource(edges.get(0))).getCigarElements() ) {
|
||||
if( edges.get(0).isRef() && !graph.isRefSource(edges.get(0)) ) {
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(null, null, graph.getEdgeSource(edges.get(0))).getCigarElements() ) {
|
||||
cigar.add(ce);
|
||||
}
|
||||
}
|
||||
|
|
@ -197,18 +205,18 @@ public class KBestPaths {
|
|||
|
||||
for( final DeBruijnEdge e : edges ) {
|
||||
if( e.equals(graph, edges.get(0)) ) {
|
||||
advanceBubbleStateMachine( bsm, graph, graph.getEdgeSource(e), null );
|
||||
advanceBubbleStateMachine( bsm, graph.getEdgeSource(e), null );
|
||||
}
|
||||
advanceBubbleStateMachine( bsm, graph, graph.getEdgeTarget(e), e );
|
||||
advanceBubbleStateMachine( bsm, graph.getEdgeTarget(e), e );
|
||||
}
|
||||
|
||||
// special case for paths that don't end on reference
|
||||
if( bsm.inBubble ) {
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, null).getCigarElements() ) {
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(bsm.bubbleBytes, bsm.lastSeenReferenceNode, null).getCigarElements() ) {
|
||||
bsm.cigar.add(ce);
|
||||
}
|
||||
} else if( edges.get(edges.size()-1).isRef() && !isRefSink(graph, edges.get(edges.size()-1)) ) { // special case for paths that end of the reference but haven't completed the entire reference circuit
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, graph.getEdgeTarget(edges.get(edges.size()-1)), null).getCigarElements() ) {
|
||||
} else if( edges.get(edges.size()-1).isRef() && !graph.isRefSink(edges.get(edges.size()-1)) ) { // special case for paths that end of the reference but haven't completed the entire reference circuit
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(bsm.bubbleBytes, graph.getEdgeTarget(edges.get(edges.size()-1)), null).getCigarElements() ) {
|
||||
bsm.cigar.add(ce);
|
||||
}
|
||||
}
|
||||
|
|
@ -216,59 +224,72 @@ public class KBestPaths {
|
|||
return AlignmentUtils.consolidateCigar(bsm.cigar);
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance the bubble state machine by incorporating the next node in the path.
|
||||
* @param bsm the current bubble state machine
|
||||
* @param node the node to be incorporated
|
||||
* @param e the edge which generated this node in the path
|
||||
*/
|
||||
@Requires({"bsm != null", "graph != null", "node != null"})
|
||||
private void advanceBubbleStateMachine( final BubbleStateMachine bsm, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex node, final DeBruijnEdge e ) {
|
||||
if( isReferenceNode( graph, node ) ) {
|
||||
private void advanceBubbleStateMachine( final BubbleStateMachine bsm, final DeBruijnVertex node, final DeBruijnEdge e ) {
|
||||
if( graph.isReferenceNode( node ) ) {
|
||||
if( !bsm.inBubble ) { // just add the ref bases as M's in the Cigar string, and don't do anything else
|
||||
if( e !=null && !e.isRef() ) {
|
||||
if( referencePathExists( graph, graph.getEdgeSource(e), node) ) {
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, graph.getEdgeSource(e), node).getCigarElements() ) {
|
||||
if( graph.referencePathExists( graph.getEdgeSource(e), node) ) {
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(null, graph.getEdgeSource(e), node).getCigarElements() ) {
|
||||
bsm.cigar.add(ce);
|
||||
}
|
||||
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
|
||||
bsm.cigar.add( new CigarElement( graph.getAdditionalSequence(node).length, CigarOperator.M) );
|
||||
} else if ( graph.getEdgeSource(e).equals(graph.getEdgeTarget(e)) ) { // alt edge at ref node points to itself
|
||||
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.I) );
|
||||
bsm.cigar.add( new CigarElement( graph.getAdditionalSequence(node).length, CigarOperator.I) );
|
||||
} else {
|
||||
bsm.inBubble = true;
|
||||
bsm.bubbleBytes = null;
|
||||
bsm.lastSeenReferenceNode = graph.getEdgeSource(e);
|
||||
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
|
||||
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, graph.getAdditionalSequence(node) );
|
||||
}
|
||||
} else {
|
||||
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
|
||||
bsm.cigar.add( new CigarElement( graph.getAdditionalSequence(node).length, CigarOperator.M) );
|
||||
}
|
||||
} else if( bsm.lastSeenReferenceNode != null && !referencePathExists( graph, bsm.lastSeenReferenceNode, node ) ) { // add bases to the bubble string until we get back to the reference path
|
||||
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
|
||||
} else if( bsm.lastSeenReferenceNode != null && !graph.referencePathExists( bsm.lastSeenReferenceNode, node ) ) { // add bases to the bubble string until we get back to the reference path
|
||||
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, graph.getAdditionalSequence(node) );
|
||||
} else { // close the bubble and use a local SW to determine the Cigar string
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, node).getCigarElements() ) {
|
||||
for( final CigarElement ce : calculateCigarForCompleteBubble(bsm.bubbleBytes, bsm.lastSeenReferenceNode, node).getCigarElements() ) {
|
||||
bsm.cigar.add(ce);
|
||||
}
|
||||
bsm.inBubble = false;
|
||||
bsm.bubbleBytes = null;
|
||||
bsm.lastSeenReferenceNode = null;
|
||||
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
|
||||
bsm.cigar.add( new CigarElement( graph.getAdditionalSequence(node).length, CigarOperator.M) );
|
||||
}
|
||||
} else { // non-ref vertex
|
||||
if( bsm.inBubble ) { // just keep accumulating until we get back to the reference path
|
||||
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
|
||||
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, graph.getAdditionalSequence(node) );
|
||||
} else { // open up a bubble
|
||||
bsm.inBubble = true;
|
||||
bsm.bubbleBytes = null;
|
||||
bsm.lastSeenReferenceNode = (e != null ? graph.getEdgeSource(e) : null );
|
||||
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
|
||||
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, graph.getAdditionalSequence(node) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Now that we have a completed bubble run a Smith-Waterman alignment to determine the cigar string for this bubble
|
||||
* @param bubbleBytes the bytes that comprise the alternate allele path in this bubble
|
||||
* @param fromVertex the vertex that marks the beginning of the reference path in this bubble (null indicates ref source vertex)
|
||||
* @param toVertex the vertex that marks the end of the reference path in this bubble (null indicates ref sink vertex)
|
||||
* @return the cigar string generated by running a SW alignment between the reference and alternate paths in this bubble
|
||||
*/
|
||||
@Requires({"graph != null"})
|
||||
@Ensures({"result != null", "result.getReadLength() == bubbleBytes.length"})
|
||||
private Cigar calculateCigarForCompleteBubble( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final byte[] bubbleBytes, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
|
||||
final byte[] refBytes = getReferenceBytes(this, graph, fromVertex, toVertex);
|
||||
@Ensures({"result != null"})
|
||||
private Cigar calculateCigarForCompleteBubble( final byte[] bubbleBytes, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
|
||||
final byte[] refBytes = graph.getReferenceBytes(fromVertex == null ? graph.getReferenceSourceVertex() : fromVertex, toVertex == null ? graph.getReferenceSinkVertex() : toVertex, fromVertex == null, toVertex == null);
|
||||
|
||||
final Cigar cigar = new Cigar();
|
||||
final Cigar returnCigar = new Cigar();
|
||||
|
||||
// add padding to anchor ref/alt bases in the SW matrix
|
||||
byte[] padding = "XXXXXX".getBytes();
|
||||
byte[] padding = STARTING_SW_ANCHOR_BYTES;
|
||||
boolean goodAlignment = false;
|
||||
SWPairwiseAlignment swConsensus = null;
|
||||
while( !goodAlignment && padding.length < 1000 ) {
|
||||
|
|
@ -280,27 +301,48 @@ public class KBestPaths {
|
|||
goodAlignment = true;
|
||||
}
|
||||
}
|
||||
if( !goodAlignment && swConsensus != null ) {
|
||||
throw new ReviewedStingException("SmithWaterman offset failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
|
||||
if( !goodAlignment ) {
|
||||
returnCigar.add(new CigarElement(1, CigarOperator.N));
|
||||
return returnCigar;
|
||||
}
|
||||
|
||||
if( swConsensus != null ) {
|
||||
final Cigar swCigar = swConsensus.getCigar();
|
||||
final Cigar swCigar = swConsensus.getCigar();
|
||||
if( swCigar.numCigarElements() > 6 ) { // this bubble is too divergent from the reference
|
||||
returnCigar.add(new CigarElement(1, CigarOperator.N));
|
||||
} else {
|
||||
int skipElement = -1;
|
||||
if( fromVertex == null ) {
|
||||
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
|
||||
final CigarElement ce = swCigar.getCigarElement(iii);
|
||||
if( ce.getOperator().equals(CigarOperator.D) ) {
|
||||
skipElement = iii;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (toVertex == null ) {
|
||||
for( int iii = swCigar.numCigarElements() - 1; iii >= 0; iii-- ) {
|
||||
final CigarElement ce = swCigar.getCigarElement(iii);
|
||||
if( ce.getOperator().equals(CigarOperator.D) ) {
|
||||
skipElement = iii;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
|
||||
// now we need to remove the padding from the cigar string
|
||||
int length = swCigar.getCigarElement(iii).getLength();
|
||||
if( iii == 0 ) { length -= padding.length; }
|
||||
if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; }
|
||||
if( length > 0 ) {
|
||||
cigar.add( new CigarElement(length, swCigar.getCigarElement(iii).getOperator()) );
|
||||
returnCigar.add(new CigarElement(length, (skipElement == iii ? CigarOperator.X : swCigar.getCigarElement(iii).getOperator())));
|
||||
}
|
||||
}
|
||||
if( (refBytes == null && cigar.getReferenceLength() != 0) || ( refBytes != null && cigar.getReferenceLength() != refBytes.length ) ) {
|
||||
throw new ReviewedStingException("SmithWaterman cigar failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
|
||||
if( (refBytes == null && returnCigar.getReferenceLength() != 0) || ( refBytes != null && returnCigar.getReferenceLength() != refBytes.length ) ) {
|
||||
throw new IllegalStateException("SmithWaterman cigar failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
|
||||
}
|
||||
}
|
||||
|
||||
return cigar;
|
||||
return returnCigar;
|
||||
}
|
||||
|
||||
// class to keep track of the bubble state machine
|
||||
|
|
@ -326,8 +368,18 @@ public class KBestPaths {
|
|||
}
|
||||
}
|
||||
|
||||
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
|
||||
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
|
||||
/**
|
||||
* Traverse the graph and pull out the best k paths.
|
||||
* Paths are scored via their comparator function. The default being PathComparatorTotalScore()
|
||||
* @param graph the graph from which to pull paths
|
||||
* @param k the number of paths to find
|
||||
* @return a list with at most k top-scoring paths from the graph
|
||||
*/
|
||||
@Ensures({"result != null", "result.size() <= k"})
|
||||
public static List<Path> getKBestPaths( final DeBruijnAssemblyGraph graph, final int k ) {
|
||||
if( graph == null ) { throw new IllegalArgumentException("Attempting to traverse a null graph."); }
|
||||
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new IllegalArgumentException("Asked for more paths than internal parameters allow for."); }
|
||||
|
||||
final ArrayList<Path> bestPaths = new ArrayList<Path>();
|
||||
|
||||
// run a DFS for best paths
|
||||
|
|
@ -350,12 +402,14 @@ public class KBestPaths {
|
|||
|
||||
// did we hit the end of a path?
|
||||
if ( allOutgoingEdgesHaveBeenVisited(path) ) {
|
||||
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
|
||||
// clean out some low scoring paths
|
||||
Collections.sort(bestPaths, new PathComparatorTotalScore() );
|
||||
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
|
||||
if( path.containsRefEdge() ) {
|
||||
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
|
||||
// clean out some low scoring paths
|
||||
Collections.sort(bestPaths, new PathComparatorTotalScore() );
|
||||
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
|
||||
}
|
||||
bestPaths.add(path);
|
||||
}
|
||||
bestPaths.add(path);
|
||||
} else if( n.val > 10000) {
|
||||
// do nothing, just return
|
||||
} else {
|
||||
|
|
@ -376,227 +430,16 @@ public class KBestPaths {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param path the path to test
|
||||
* @return true if all the outgoing edges at the end of this path have already been visited
|
||||
*/
|
||||
private static boolean allOutgoingEdgesHaveBeenVisited( final Path path ) {
|
||||
for( final DeBruijnEdge edge : path.graph.outgoingEdgesOf(path.lastVertex) ) {
|
||||
if( !path.containsEdge(edge) ) {
|
||||
if( !path.containsEdge(edge) ) { // TODO -- investigate allowing numInPath < 2 to allow cycles
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/****************************************************************
|
||||
* Collection of graph functions used by KBestPaths *
|
||||
***************************************************************/
|
||||
|
||||
/**
|
||||
* Test if the vertex is on a reference path in the graph. If so it is referred to as a reference node
|
||||
* @param graph the graph from which the vertex originated
|
||||
* @param v the vertex to test
|
||||
* @return true if the vertex is on the reference path
|
||||
*/
|
||||
public static boolean isReferenceNode( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
|
||||
if( e.isRef() ) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this edge a source edge (the source vertex of the edge is a source node in the graph)
|
||||
* @param graph the graph from which the edge originated
|
||||
* @param e the edge to test
|
||||
* @return true if the source vertex of the edge is a source node in the graph
|
||||
*/
|
||||
public static boolean isSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||
return graph.inDegreeOf(graph.getEdgeSource(e)) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this vertex a source vertex
|
||||
* @param graph the graph from which the vertex originated
|
||||
* @param v the vertex to test
|
||||
* @return true if the vertex is a source vertex
|
||||
*/
|
||||
public static boolean isSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||
return graph.inDegreeOf(v) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this edge both a reference edge and a source edge for the reference path
|
||||
* @param graph the graph from which the edge originated
|
||||
* @param e the edge to test
|
||||
* @return true if the edge is both a reference edge and a reference path source edge
|
||||
*/
|
||||
public static boolean isRefSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
|
||||
if( edgeToTest.isRef() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this vertex both a reference node and a source node for the reference path
|
||||
* @param graph the graph from which the vertex originated
|
||||
* @param v the vertex to test
|
||||
* @return true if the vertex is both a reference node and a reference path source node
|
||||
*/
|
||||
public static boolean isRefSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
|
||||
if( edgeToTest.isRef() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this edge both a reference edge and a sink edge for the reference path
|
||||
* @param graph the graph from which the edge originated
|
||||
* @param e the edge to test
|
||||
* @return true if the edge is both a reference edge and a reference path sink edge
|
||||
*/
|
||||
public static boolean isRefSink( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
|
||||
if( edgeToTest.isRef() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this vertex both a reference node and a sink node for the reference path
|
||||
* @param graph the graph from which the node originated
|
||||
* @param v the node to test
|
||||
* @return true if the vertex is both a reference node and a reference path sink node
|
||||
*/
|
||||
public static boolean isRefSink( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
|
||||
if( edgeToTest.isRef() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static DeBruijnEdge getReferenceSourceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||
for( final DeBruijnEdge e : graph.edgeSet() ) {
|
||||
if( e.isRef() && isRefSource(graph, e) ) {
|
||||
return e;
|
||||
}
|
||||
}
|
||||
throw new ReviewedStingException("All reference graphs should have a source node");
|
||||
}
|
||||
|
||||
public static DeBruijnVertex getReferenceSourceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||
if( isReferenceNode(graph, v) && isRefSource(graph, v) ) {
|
||||
return v;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static DeBruijnEdge getReferenceSinkEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||
for( final DeBruijnEdge e : graph.edgeSet() ) {
|
||||
if( e.isRef() && isRefSink(graph, e) ) {
|
||||
return e;
|
||||
}
|
||||
}
|
||||
throw new ReviewedStingException("All reference graphs should have a sink node");
|
||||
}
|
||||
|
||||
public static DeBruijnVertex getReferenceSinkVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||
if( isReferenceNode(graph, v) && isRefSink(graph, v) ) {
|
||||
return v;
|
||||
}
|
||||
}
|
||||
throw new ReviewedStingException("All reference graphs should have a sink node");
|
||||
}
|
||||
|
||||
public static DeBruijnEdge getNextReferenceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||
if( e == null ) { return null; }
|
||||
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
|
||||
if( edgeToTest.isRef() ) {
|
||||
return edgeToTest;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static DeBruijnVertex getNextReferenceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||
if( v == null ) { return null; }
|
||||
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
|
||||
if( edgeToTest.isRef() ) {
|
||||
return graph.getEdgeTarget(edgeToTest);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static DeBruijnEdge getPrevReferenceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
|
||||
if( edgeToTest.isRef() ) {
|
||||
return edgeToTest;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static DeBruijnVertex getPrevReferenceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
|
||||
if( isReferenceNode(graph, graph.getEdgeSource(edgeToTest)) ) {
|
||||
return graph.getEdgeSource(edgeToTest);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static boolean referencePathExists(final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge fromEdge, final DeBruijnEdge toEdge) {
|
||||
DeBruijnEdge e = fromEdge;
|
||||
if( e == null ) {
|
||||
return false;
|
||||
}
|
||||
while( !e.equals(graph, toEdge) ) {
|
||||
e = getNextReferenceEdge(graph, e);
|
||||
if( e == null ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean referencePathExists(final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex) {
|
||||
DeBruijnVertex v = fromVertex;
|
||||
if( v == null ) {
|
||||
return false;
|
||||
}
|
||||
v = getNextReferenceVertex(graph, v);
|
||||
if( v == null ) {
|
||||
return false;
|
||||
}
|
||||
while( !v.equals(toVertex) ) {
|
||||
v = getNextReferenceVertex(graph, v);
|
||||
if( v == null ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// fromVertex (exclusive) -> toVertex (exclusive)
|
||||
public static byte[] getReferenceBytes( final Path path, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
|
||||
byte[] bytes = null;
|
||||
if( fromVertex != null && toVertex != null && !referencePathExists(graph, fromVertex, toVertex) ) {
|
||||
throw new ReviewedStingException("Asked for a reference path which doesn't exist. " + fromVertex + " --> " + toVertex);
|
||||
}
|
||||
DeBruijnVertex v = fromVertex;
|
||||
if( v == null ) {
|
||||
v = getReferenceSourceVertex(graph);
|
||||
bytes = ArrayUtils.addAll( bytes, path.getAdditionalSequence(graph, v) );
|
||||
}
|
||||
v = getNextReferenceVertex(graph, v);
|
||||
while( (toVertex != null && !v.equals(toVertex)) || (toVertex == null && v != null) ) {
|
||||
bytes = ArrayUtils.addAll( bytes, path.getAdditionalSequence(graph, v) );
|
||||
// advance along the reference path
|
||||
v = getNextReferenceVertex(graph, v);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,13 +72,10 @@ public class LikelihoodCalculationEngine {
|
|||
|
||||
switch (hmmType) {
|
||||
case EXACT:
|
||||
pairHMM = new ExactPairHMM();
|
||||
pairHMM = new Log10PairHMM(true);
|
||||
break;
|
||||
case ORIGINAL:
|
||||
pairHMM = new OriginalPairHMM();
|
||||
break;
|
||||
case CACHING:
|
||||
pairHMM = new CachingPairHMM();
|
||||
pairHMM = new Log10PairHMM(false);
|
||||
break;
|
||||
case LOGLESS_CACHING:
|
||||
pairHMM = new LoglessCachingPairHMM();
|
||||
|
|
@ -112,7 +109,7 @@ public class LikelihoodCalculationEngine {
|
|||
Y_METRIC_LENGTH += 2;
|
||||
|
||||
// initialize arrays to hold the probabilities of being in the match, insertion and deletion cases
|
||||
pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
|
||||
pairHMM.initialize(Y_METRIC_LENGTH, X_METRIC_LENGTH);
|
||||
|
||||
// for each sample's reads
|
||||
for( final Map.Entry<String, List<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
|
||||
|
|
@ -128,7 +125,7 @@ public class LikelihoodCalculationEngine {
|
|||
final int numHaplotypes = haplotypes.size();
|
||||
final Map<Haplotype, Allele> alleleVersions = new HashMap<Haplotype, Allele>(numHaplotypes);
|
||||
for ( final Haplotype haplotype : haplotypes ) {
|
||||
alleleVersions.put(haplotype, Allele.create(haplotype.getBases()));
|
||||
alleleVersions.put(haplotype, Allele.create(haplotype, true));
|
||||
}
|
||||
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
|
|
@ -136,11 +133,12 @@ public class LikelihoodCalculationEngine {
|
|||
final byte[] overallGCP = new byte[read.getReadLength()];
|
||||
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
|
||||
Haplotype previousHaplotypeSeen = null;
|
||||
final byte[] readQuals = read.getBaseQualities();
|
||||
// NOTE -- must clone anything that gets modified here so we don't screw up future uses of the read
|
||||
final byte[] readQuals = read.getBaseQualities().clone();
|
||||
final byte[] readInsQuals = read.getBaseInsertionQualities();
|
||||
final byte[] readDelQuals = read.getBaseDeletionQualities();
|
||||
for( int kkk = 0; kkk < readQuals.length; kkk++ ) {
|
||||
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
|
||||
readQuals[kkk] = (byte) Math.min( 0xff & readQuals[kkk], read.getMappingQuality()); // cap base quality by mapping quality, as in UG
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
|
||||
// TODO -- why is Q18 hard-coded here???
|
||||
|
|
@ -150,7 +148,7 @@ public class LikelihoodCalculationEngine {
|
|||
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
|
||||
final Haplotype haplotype = haplotypes.get(jjj);
|
||||
|
||||
final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) );
|
||||
final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : PairHMM.findFirstPositionWhereHaplotypesDiffer(haplotype.getBases(), previousHaplotypeSeen.getBases()) );
|
||||
previousHaplotypeSeen = haplotype;
|
||||
|
||||
perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype),
|
||||
|
|
@ -161,15 +159,6 @@ public class LikelihoodCalculationEngine {
|
|||
return perReadAlleleLikelihoodMap;
|
||||
}
|
||||
|
||||
private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
|
||||
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ) {
|
||||
if( b1[iii] != b2[iii] ) {
|
||||
return iii;
|
||||
}
|
||||
}
|
||||
return Math.min(b1.length, b2.length);
|
||||
}
|
||||
|
||||
@Requires({"alleleOrdering.size() > 0"})
|
||||
@Ensures({"result.length == result[0].length", "result.length == alleleOrdering.size()"})
|
||||
public static double[][] computeDiploidHaplotypeLikelihoods( final String sample,
|
||||
|
|
@ -244,7 +233,7 @@ public class LikelihoodCalculationEngine {
|
|||
final List<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
|
||||
bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
|
||||
final List<Allele> haplotypesAsAlleles = new ArrayList<Allele>();
|
||||
for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h.getBases())); }
|
||||
for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h, true)); }
|
||||
|
||||
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, stratifiedReadMap, haplotypesAsAlleles ); // all samples pooled together
|
||||
|
||||
|
|
|
|||
|
|
@ -153,6 +153,7 @@ public class ConstrainedMateFixingManager {
|
|||
* are assumes to not be allowed to move in the incoming read stream.
|
||||
*/
|
||||
final int maxInsertSizeForMovingReadPairs;
|
||||
final int initialCapacity = 5000;
|
||||
|
||||
final GenomeLocParser genomeLocParser;
|
||||
private GenomeLoc lastLocFlushed = null;
|
||||
|
|
@ -161,12 +162,12 @@ public class ConstrainedMateFixingManager {
|
|||
|
||||
/** read.name -> records */
|
||||
HashMap<String, SAMRecordHashObject> forMateMatching = new HashMap<String, SAMRecordHashObject>();
|
||||
TreeSet<SAMRecord> waitingReads = new TreeSet<SAMRecord>(comparer);
|
||||
PriorityQueue<SAMRecord> waitingReads = new PriorityQueue<SAMRecord>(initialCapacity, comparer);
|
||||
|
||||
private SAMRecord remove(TreeSet<SAMRecord> treeSet) {
|
||||
final SAMRecord first = treeSet.first();
|
||||
if ( !treeSet.remove(first) )
|
||||
throw new UserException("Error caching SAM record " + first.getReadName() + ", which is usually caused by malformed SAM/BAM files in which multiple identical copies of a read are present.");
|
||||
private SAMRecord remove(PriorityQueue<SAMRecord> queue) {
|
||||
SAMRecord first = queue.poll();
|
||||
if (first == null)
|
||||
throw new UserException("Error caching SAM record -- priority queue is empty, and yet there was an attempt to poll it -- which is usually caused by malformed SAM/BAM files in which multiple identical copies of a read are present.");
|
||||
return first;
|
||||
}
|
||||
|
||||
|
|
@ -243,8 +244,8 @@ public class ConstrainedMateFixingManager {
|
|||
|
||||
// if the new read is on a different contig or we have too many reads, then we need to flush the queue and clear the map
|
||||
boolean tooManyReads = getNReadsInQueue() >= MAX_RECORDS_IN_MEMORY;
|
||||
if ( (canFlush && tooManyReads) || (getNReadsInQueue() > 0 && !waitingReads.first().getReferenceIndex().equals(newRead.getReferenceIndex())) ) {
|
||||
if ( DEBUG ) logger.warn("Flushing queue on " + (tooManyReads ? "too many reads" : ("move to new contig: " + newRead.getReferenceName() + " from " + waitingReads.first().getReferenceName())) + " at " + newRead.getAlignmentStart());
|
||||
if ( (canFlush && tooManyReads) || (getNReadsInQueue() > 0 && !waitingReads.peek().getReferenceIndex().equals(newRead.getReferenceIndex())) ) {
|
||||
if ( DEBUG ) logger.warn("Flushing queue on " + (tooManyReads ? "too many reads" : ("move to new contig: " + newRead.getReferenceName() + " from " + waitingReads.peek().getReferenceName())) + " at " + newRead.getAlignmentStart());
|
||||
|
||||
while ( getNReadsInQueue() > 1 ) {
|
||||
// emit to disk
|
||||
|
|
@ -307,7 +308,7 @@ public class ConstrainedMateFixingManager {
|
|||
|
||||
if ( ++counter % EMIT_FREQUENCY == 0 ) {
|
||||
while ( ! waitingReads.isEmpty() ) { // there's something in the queue
|
||||
SAMRecord read = waitingReads.first();
|
||||
SAMRecord read = waitingReads.peek();
|
||||
|
||||
if ( noReadCanMoveBefore(read.getAlignmentStart(), newRead) &&
|
||||
(!pairedReadIsMovable(read) // we won't try to move such a read
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
|
|
@ -61,7 +60,10 @@ import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -69,13 +71,13 @@ import org.broadinstitute.sting.utils.exceptions.StingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -133,7 +135,7 @@ import java.util.*;
|
|||
*
|
||||
* @author ebanks
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_OUTPUT)
|
||||
public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||
|
||||
|
|
@ -308,7 +310,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private boolean sawReadInCurrentInterval = false;
|
||||
|
||||
// the reads and known indels that fall into the current interval
|
||||
private final ReadBin readsToClean = new ReadBin();
|
||||
private ReadBin readsToClean;
|
||||
private final ArrayList<GATKSAMRecord> readsNotToClean = new ArrayList<GATKSAMRecord>();
|
||||
private final ArrayList<VariantContext> knownIndelsToTry = new ArrayList<VariantContext>();
|
||||
private final HashSet<Object> indelRodsSeen = new HashSet<Object>();
|
||||
|
|
@ -371,6 +373,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public void initialize() {
|
||||
readsToClean = new ReadBin(getToolkit().getGenomeLocParser(), REFERENCE_PADDING);
|
||||
|
||||
if ( N_WAY_OUT == null && writer == null ) {
|
||||
throw new UserException.CommandLineException("Either -o or -nWayOut must be specified");
|
||||
|
|
@ -468,12 +471,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
try {
|
||||
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
|
||||
programRecord.setProgramVersion(version);
|
||||
} catch (MissingResourceException e) {}
|
||||
} catch (MissingResourceException e) {
|
||||
// this is left empty on purpose (perhaps Andrey knows why?)
|
||||
}
|
||||
programRecord.setCommandLine(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
|
||||
return programRecord;
|
||||
}
|
||||
|
||||
private void emit(final SAMRecord read) {
|
||||
private void emit(final GATKSAMRecord read) {
|
||||
|
||||
// check to see whether the read was modified by looking at the temporary tag
|
||||
boolean wasModified = readsActuallyCleaned.contains(read);
|
||||
|
|
@ -529,7 +534,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
readsToClean.add(read);
|
||||
|
||||
// add the rods to the list of known variants
|
||||
populateKnownIndels(metaDataTracker, ref);
|
||||
populateKnownIndels(metaDataTracker);
|
||||
}
|
||||
|
||||
if ( readsToClean.size() + readsNotToClean.size() >= MAX_READS ) {
|
||||
|
|
@ -538,6 +543,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
else { // the read is past the current interval
|
||||
logger.debug(currentInterval.toString() + "\t" + read.getAlignmentStart() );
|
||||
cleanAndCallMap(ref, read, metaDataTracker, readLoc);
|
||||
}
|
||||
|
||||
|
|
@ -641,7 +647,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private void populateKnownIndels(RefMetaDataTracker metaDataTracker, ReferenceContext ref) {
|
||||
private void populateKnownIndels(RefMetaDataTracker metaDataTracker) {
|
||||
for ( final VariantContext vc : metaDataTracker.getValues(known) ) {
|
||||
if ( indelRodsSeen.contains(vc) )
|
||||
continue;
|
||||
|
|
@ -704,10 +710,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n");
|
||||
|
||||
Consensus bestConsensus = null;
|
||||
Iterator<Consensus> iter = altConsenses.iterator();
|
||||
|
||||
while ( iter.hasNext() ) {
|
||||
Consensus consensus = iter.next();
|
||||
for (Consensus consensus : altConsenses) {
|
||||
//logger.debug("Trying new consensus: " + consensus.cigar + " " + new String(consensus.str));
|
||||
|
||||
// if ( DEBUG ) {
|
||||
|
|
@ -722,34 +726,34 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
// if ( debugOn ) System.out.println("Consensus: "+consensus.str);
|
||||
|
||||
for ( int j = 0; j < altReads.size(); j++ ) {
|
||||
for (int j = 0; j < altReads.size(); j++) {
|
||||
AlignedRead toTest = altReads.get(j);
|
||||
Pair<Integer, Integer> altAlignment = findBestOffset(consensus.str, toTest, leftmostIndex);
|
||||
|
||||
// the mismatch score is the min of its alignment vs. the reference and vs. the alternate
|
||||
int myScore = altAlignment.second;
|
||||
|
||||
if ( myScore > toTest.getAlignerMismatchScore() || myScore >= toTest.getMismatchScoreToReference() )
|
||||
if (myScore > toTest.getAlignerMismatchScore() || myScore >= toTest.getMismatchScoreToReference())
|
||||
myScore = toTest.getMismatchScoreToReference();
|
||||
// keep track of reads that align better to the alternate consensus.
|
||||
// By pushing alignments with equal scores to the alternate, it means we'll over-call (het -> hom non ref) but are less likely to under-call (het -> ref, het non ref -> het)
|
||||
// keep track of reads that align better to the alternate consensus.
|
||||
// By pushing alignments with equal scores to the alternate, it means we'll over-call (het -> hom non ref) but are less likely to under-call (het -> ref, het non ref -> het)
|
||||
else
|
||||
consensus.readIndexes.add(new Pair<Integer, Integer>(j, altAlignment.first));
|
||||
|
||||
//logger.debug(consensus.cigar + " vs. " + toTest.getRead().getReadName() + "-" + toTest.getRead().getReadString() + " => " + myScore + " vs. " + toTest.getMismatchScoreToReference());
|
||||
if ( !toTest.getRead().getDuplicateReadFlag() )
|
||||
if (!toTest.getRead().getDuplicateReadFlag())
|
||||
consensus.mismatchSum += myScore;
|
||||
|
||||
// optimization: once the mismatch sum is higher than the best consensus, quit since this one can't win
|
||||
// THIS MUST BE DISABLED IF WE DECIDE TO ALLOW MORE THAN ONE ALTERNATE CONSENSUS!
|
||||
if ( bestConsensus != null && consensus.mismatchSum > bestConsensus.mismatchSum )
|
||||
if (bestConsensus != null && consensus.mismatchSum > bestConsensus.mismatchSum)
|
||||
break;
|
||||
}
|
||||
|
||||
//logger.debug("Mismatch sum of new consensus: " + consensus.mismatchSum);
|
||||
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
||||
if (bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
||||
// we do not need this alt consensus, release memory right away!!
|
||||
if ( bestConsensus != null )
|
||||
if (bestConsensus != null)
|
||||
bestConsensus.readIndexes.clear();
|
||||
bestConsensus = consensus;
|
||||
//logger.debug("New consensus " + bestConsensus.cigar + " is now best consensus");
|
||||
|
|
@ -767,7 +771,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
final double improvement = (bestConsensus == null ? -1 : ((double)(totalRawMismatchSum - bestConsensus.mismatchSum))/10.0);
|
||||
if ( improvement >= LOD_THRESHOLD ) {
|
||||
|
||||
bestConsensus.cigar = AlignmentUtils.leftAlignIndel(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference);
|
||||
bestConsensus.cigar = AlignmentUtils.leftAlignIndel(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference, true);
|
||||
|
||||
// start cleaning the appropriate reads
|
||||
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
|
||||
|
|
@ -795,9 +799,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
StringBuilder str = new StringBuilder();
|
||||
str.append(reads.get(0).getReferenceName());
|
||||
int position = bestConsensus.positionOnReference + bestConsensus.cigar.getCigarElement(0).getLength();
|
||||
str.append("\t" + (leftmostIndex + position - 1));
|
||||
str.append("\t").append(leftmostIndex + position - 1);
|
||||
CigarElement ce = bestConsensus.cigar.getCigarElement(1);
|
||||
str.append("\t" + ce.getLength() + "\t" + ce.getOperator() + "\t");
|
||||
str.append("\t").append(ce.getLength()).append("\t").append(ce.getOperator()).append("\t");
|
||||
int length = ce.getLength();
|
||||
if ( ce.getOperator() == CigarOperator.D ) {
|
||||
for ( int i = 0; i < length; i++)
|
||||
|
|
@ -806,7 +810,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
for ( int i = 0; i < length; i++)
|
||||
str.append((char)bestConsensus.str[position+i]);
|
||||
}
|
||||
str.append("\t" + (((double)(totalRawMismatchSum - bestConsensus.mismatchSum))/10.0) + "\n");
|
||||
str.append("\t").append((((double) (totalRawMismatchSum - bestConsensus.mismatchSum)) / 10.0)).append("\n");
|
||||
try {
|
||||
indelOutput.write(str.toString());
|
||||
indelOutput.flush();
|
||||
|
|
@ -912,7 +916,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
final byte[] reference) {
|
||||
|
||||
long totalRawMismatchSum = 0L;
|
||||
|
||||
for ( final GATKSAMRecord read : reads ) {
|
||||
|
||||
// we can not deal with screwy records
|
||||
|
|
@ -926,7 +929,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
||||
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
||||
if ( numBlocks == 2 ) {
|
||||
Cigar newCigar = AlignmentUtils.leftAlignIndel(unclipCigar(read.getCigar()), reference, read.getReadBases(), read.getAlignmentStart()-leftmostIndex, 0);
|
||||
Cigar newCigar = AlignmentUtils.leftAlignIndel(unclipCigar(read.getCigar()), reference, read.getReadBases(), read.getAlignmentStart()-leftmostIndex, 0, true);
|
||||
aRead.setCigar(newCigar, false);
|
||||
}
|
||||
|
||||
|
|
@ -1277,23 +1280,22 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
for ( int i=0; i < reference.length; i++ )
|
||||
originalMismatchBases[i] = totalOriginalBases[i] = cleanedMismatchBases[i] = totalCleanedBases[i] = 0;
|
||||
|
||||
for (int i=0; i < reads.size(); i++) {
|
||||
final AlignedRead read = reads.get(i);
|
||||
if ( read.getRead().getAlignmentBlocks().size() > 1 )
|
||||
continue;
|
||||
for (final AlignedRead read : reads) {
|
||||
if (read.getRead().getAlignmentBlocks().size() > 1)
|
||||
continue;
|
||||
|
||||
int refIdx = read.getOriginalAlignmentStart() - leftmostIndex;
|
||||
final byte[] readStr = read.getReadBases();
|
||||
final byte[] quals = read.getBaseQualities();
|
||||
|
||||
for (int j=0; j < readStr.length; j++, refIdx++ ) {
|
||||
if ( refIdx < 0 || refIdx >= reference.length ) {
|
||||
for (int j = 0; j < readStr.length; j++, refIdx++) {
|
||||
if (refIdx < 0 || refIdx >= reference.length) {
|
||||
//System.out.println( "Read: "+read.getRead().getReadName() + "; length = " + readStr.length() );
|
||||
//System.out.println( "Ref left: "+ leftmostIndex +"; ref length=" + reference.length() + "; read alignment start: "+read.getOriginalAlignmentStart() );
|
||||
break;
|
||||
}
|
||||
totalOriginalBases[refIdx] += quals[j];
|
||||
if ( readStr[j] != reference[refIdx] )
|
||||
if (readStr[j] != reference[refIdx])
|
||||
originalMismatchBases[refIdx] += quals[j];
|
||||
}
|
||||
|
||||
|
|
@ -1301,18 +1303,18 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
refIdx = read.getAlignmentStart() - leftmostIndex;
|
||||
int altIdx = 0;
|
||||
Cigar c = read.getCigar();
|
||||
for (int j = 0 ; j < c.numCigarElements() ; j++) {
|
||||
for (int j = 0; j < c.numCigarElements(); j++) {
|
||||
CigarElement ce = c.getCigarElement(j);
|
||||
int elementLength = ce.getLength();
|
||||
switch ( ce.getOperator() ) {
|
||||
switch (ce.getOperator()) {
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
for (int k = 0 ; k < elementLength ; k++, refIdx++, altIdx++ ) {
|
||||
if ( refIdx >= reference.length )
|
||||
for (int k = 0; k < elementLength; k++, refIdx++, altIdx++) {
|
||||
if (refIdx >= reference.length)
|
||||
break;
|
||||
totalCleanedBases[refIdx] += quals[altIdx];
|
||||
if ( readStr[altIdx] != reference[refIdx] )
|
||||
if (readStr[altIdx] != reference[refIdx])
|
||||
cleanedMismatchBases[refIdx] += quals[altIdx];
|
||||
}
|
||||
break;
|
||||
|
|
@ -1347,8 +1349,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
if ( snpsOutput != null ) {
|
||||
if ( didMismatch ) {
|
||||
sb.append(reads.get(0).getRead().getReferenceName() + ":");
|
||||
sb.append((leftmostIndex + i));
|
||||
sb.append(reads.get(0).getRead().getReferenceName()).append(":").append(leftmostIndex + i);
|
||||
if ( stillMismatches )
|
||||
sb.append(" SAME_SNP\n");
|
||||
else
|
||||
|
|
@ -1602,52 +1603,4 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private class ReadBin implements HasGenomeLocation {
|
||||
|
||||
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||
private byte[] reference = null;
|
||||
private GenomeLoc loc = null;
|
||||
|
||||
public ReadBin() { }
|
||||
|
||||
// Return false if we can't process this read bin because the reads are not correctly overlapping.
|
||||
// This can happen if e.g. there's a large known indel with no overlapping reads.
|
||||
public void add(GATKSAMRecord read) {
|
||||
|
||||
GenomeLoc locForRead = getToolkit().getGenomeLocParser().createGenomeLoc(read);
|
||||
if ( loc == null )
|
||||
loc = locForRead;
|
||||
else if ( locForRead.getStop() > loc.getStop() )
|
||||
loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), loc.getStart(), locForRead.getStop());
|
||||
|
||||
reads.add(read);
|
||||
}
|
||||
|
||||
public List<GATKSAMRecord> getReads() { return reads; }
|
||||
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) {
|
||||
// set up the reference if we haven't done so yet
|
||||
if ( reference == null ) {
|
||||
// first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read)
|
||||
int padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1);
|
||||
int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
|
||||
loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight);
|
||||
reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
|
||||
}
|
||||
|
||||
return reference;
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
public int size() { return reads.size(); }
|
||||
|
||||
public void clear() {
|
||||
reads.clear();
|
||||
reference = null;
|
||||
loc = null;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -87,7 +88,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} )
|
||||
public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
|
||||
|
||||
@Output(required=false, doc="Output bam")
|
||||
|
|
@ -110,7 +111,7 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
|
|||
// move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
||||
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
||||
if ( numBlocks == 2 ) {
|
||||
Cigar newCigar = AlignmentUtils.leftAlignIndel(IndelRealigner.unclipCigar(read.getCigar()), ref.getBases(), read.getReadBases(), 0, 0);
|
||||
Cigar newCigar = AlignmentUtils.leftAlignIndel(IndelRealigner.unclipCigar(read.getCigar()), ref.getBases(), read.getReadBases(), 0, 0, true);
|
||||
newCigar = IndelRealigner.reclipCigar(newCigar, read);
|
||||
read.setCigar(newCigar);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,14 +48,12 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pairhmm.ExactPairHMM;
|
||||
//import org.broadinstitute.sting.utils.pairhmm.LoglessCachingPairHMM;
|
||||
import org.broadinstitute.sting.utils.pairhmm.OriginalPairHMM;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.pairhmm.Log10PairHMM;
|
||||
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -68,6 +66,8 @@ import java.util.Arrays;
|
|||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
//import org.broadinstitute.sting.utils.pairhmm.LoglessCachingPairHMM;
|
||||
|
||||
|
||||
public class PairHMMIndelErrorModel {
|
||||
public static final int BASE_QUAL_THRESHOLD = 20;
|
||||
|
|
@ -116,12 +116,11 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
switch (hmmType) {
|
||||
case EXACT:
|
||||
pairHMM = new ExactPairHMM();
|
||||
pairHMM = new Log10PairHMM(true);
|
||||
break;
|
||||
case ORIGINAL:
|
||||
pairHMM = new OriginalPairHMM();
|
||||
pairHMM = new Log10PairHMM(false);
|
||||
break;
|
||||
case CACHING:
|
||||
case LOGLESS_CACHING: //TODO: still not tested so please do not use yet
|
||||
//pairHMM = new LoglessCachingPairHMM(); //TODO - add it back when the figure out how to use the protected LoglessCachingPairHMM class
|
||||
throw new UserException.BadArgumentValue("pairHMM"," this option (LOGLESS_CACHING in UG) is still under development");
|
||||
|
|
@ -344,8 +343,10 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
}
|
||||
else {
|
||||
final byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartSoftClippedBases, unclippedReadBases.length-numEndSoftClippedBases);
|
||||
final byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartSoftClippedBases, unclippedReadBases.length-numEndSoftClippedBases);
|
||||
final int endOfCopy = unclippedReadBases.length - numEndSoftClippedBases;
|
||||
final byte[] readBases = Arrays.copyOfRange(unclippedReadBases, numStartSoftClippedBases, endOfCopy);
|
||||
final byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals, numStartSoftClippedBases, endOfCopy);
|
||||
|
||||
int j=0;
|
||||
|
||||
byte[] previousHaplotypeSeen = null;
|
||||
|
|
@ -357,6 +358,16 @@ public class PairHMMIndelErrorModel {
|
|||
getContextHomopolymerLength(readBases,hrunProfile);
|
||||
fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
|
||||
|
||||
// get the base insertion and deletion qualities to use
|
||||
final byte[] baseInsertionQualities, baseDeletionQualities;
|
||||
if ( read.hasBaseIndelQualities() ) {
|
||||
baseInsertionQualities = Arrays.copyOfRange(read.getBaseInsertionQualities(), numStartSoftClippedBases, endOfCopy);
|
||||
baseDeletionQualities = Arrays.copyOfRange(read.getBaseDeletionQualities(), numStartSoftClippedBases, endOfCopy);
|
||||
} else {
|
||||
baseInsertionQualities = contextLogGapOpenProbabilities;
|
||||
baseDeletionQualities = contextLogGapOpenProbabilities;
|
||||
}
|
||||
|
||||
boolean firstHap = true;
|
||||
for (Allele a: haplotypeMap.keySet()) {
|
||||
|
||||
|
|
@ -386,7 +397,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
if (previousHaplotypeSeen == null) {
|
||||
//no need to reallocate arrays for each new haplotype, as length won't change
|
||||
pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
|
||||
pairHMM.initialize(Y_METRIC_LENGTH, X_METRIC_LENGTH);
|
||||
}
|
||||
|
||||
int startIndexInHaplotype = 0;
|
||||
|
|
@ -395,8 +406,7 @@ public class PairHMMIndelErrorModel {
|
|||
previousHaplotypeSeen = haplotypeBases.clone();
|
||||
|
||||
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals,
|
||||
(read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
|
||||
(read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
|
||||
baseInsertionQualities, baseDeletionQualities,
|
||||
contextLogGapContinuationProbabilities, startIndexInHaplotype, firstHap);
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: carneiro
|
||||
* Date: 2/16/13
|
||||
* Time: 11:15 PM
|
||||
*/
|
||||
class ReadBin implements HasGenomeLocation {
|
||||
|
||||
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||
private byte[] reference = null;
|
||||
private GenomeLoc loc = null;
|
||||
private final GenomeLocParser parser;
|
||||
private final int referencePadding;
|
||||
|
||||
public ReadBin(final GenomeLocParser parser, final int referencePadding) {
|
||||
this.parser = parser;
|
||||
this.referencePadding = referencePadding;
|
||||
}
|
||||
|
||||
// Return false if we can't process this read bin because the reads are not correctly overlapping.
|
||||
// This can happen if e.g. there's a large known indel with no overlapping reads.
|
||||
public void add(GATKSAMRecord read) {
|
||||
|
||||
final int readStart = read.getSoftStart();
|
||||
final int readStop = read.getSoftEnd();
|
||||
if ( loc == null )
|
||||
loc = parser.createGenomeLoc(read.getReferenceName(), readStart, readStop);
|
||||
else if ( readStop > loc.getStop() )
|
||||
loc = parser.createGenomeLoc(loc.getContig(), loc.getStart(), readStop);
|
||||
|
||||
reads.add(read);
|
||||
}
|
||||
|
||||
public List<GATKSAMRecord> getReads() {
|
||||
return reads;
|
||||
}
|
||||
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) {
|
||||
// set up the reference if we haven't done so yet
|
||||
if ( reference == null ) {
|
||||
// first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read)
|
||||
int padLeft = Math.max(loc.getStart()- referencePadding, 1);
|
||||
int padRight = Math.min(loc.getStop()+ referencePadding, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
|
||||
loc = parser.createGenomeLoc(loc.getContig(), loc.getContigIndex(), padLeft, padRight);
|
||||
reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
|
||||
}
|
||||
|
||||
return reference;
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return reads.size();
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
reads.clear();
|
||||
reference = null;
|
||||
loc = null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -60,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
|
@ -90,9 +91,12 @@ import java.util.TreeSet;
|
|||
* <li>Running the realigner over those intervals (see the IndelRealigner tool)</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* An important note: the input BAM(s), reference, and known indel file(s) should be the same ones to be used for the IndelRealigner step.
|
||||
* Important note 1: the input BAM(s), reference, and known indel file(s) should be the same ones to be used for the IndelRealigner step.
|
||||
* <p>
|
||||
* Another important note: because reads produced from the 454 technology inherently contain false indels, the realigner will not currently work with them
|
||||
* Important note 2: when multiple potential indels are found by the tool in the same general region, the tool will choose the most likely
|
||||
* one for realignment to the exclusion of the others. This is a known limitation of the tool.
|
||||
* <p>
|
||||
* Important note 3: because reads produced from the 454 technology inherently contain false indels, the realigner will not currently work with them
|
||||
* (or with reads from similar technologies). This tool also ignores MQ0 reads and reads with consecutive indel operators in the CIGAR string.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
|
|
@ -117,7 +121,7 @@ import java.util.TreeSet;
|
|||
*
|
||||
* @author ebanks
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} )
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, BadMateFilter.class, Platform454Filter.class, BadCigarFilter.class})
|
||||
@Reference(window=@Window(start=-1,stop=50))
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
|
|
|
|||
|
|
@ -57,7 +57,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
|
@ -122,7 +124,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
|
||||
public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMap<Byte,Integer>> {
|
||||
|
||||
@ArgumentCollection
|
||||
|
|
@ -394,7 +396,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
int phredScoreTransmission = -1;
|
||||
if(transmissionProb != NO_TRANSMISSION_PROB){
|
||||
double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb)));
|
||||
double dphredScoreTransmission = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1 - (transmissionProb)));
|
||||
phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
|
||||
}
|
||||
//Handle null, missing and unavailable genotypes
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -118,7 +119,7 @@ import static org.broadinstitute.sting.utils.variant.GATKVCFUtils.getVCFHeadersF
|
|||
// Filter out all reads with zero mapping quality
|
||||
@ReadFilters({MappingQualityZeroFilter.class})
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
|
||||
public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
|
||||
@Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information (if -l DEBUG is also specified)", required = false)
|
||||
protected boolean DEBUG = false;
|
||||
|
|
@ -1063,7 +1064,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
// Determine the phase at this position:
|
||||
this.maxEntry = hapTable.maxEntry();
|
||||
|
||||
// convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb):
|
||||
// convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.trueProbToQual(posteriorProb):
|
||||
PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO);
|
||||
for (PhasingTable.PhasingTableEntry pte : hapTable) {
|
||||
if (pte != maxEntry)
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
|
@ -212,7 +213,7 @@ import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel;
|
|||
* @since ${DATE}
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Validation Utilities", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} )
|
||||
@Requires(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@By(DataSource.REFERENCE)
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
|
|
@ -121,7 +122,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Validation Utilities", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} )
|
||||
public class ValidationSiteSelector extends RodWalker<Integer, Integer> {
|
||||
|
||||
public enum AF_COMPUTATION_MODE {
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -107,7 +108,7 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
public class ApplyRecalibration extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
|
|
@ -125,7 +126,7 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.NONE)
|
||||
public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDatum>, ExpandingArrayList<VariantDatum>> implements TreeReducible<ExpandingArrayList<VariantDatum>> {
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
@ -98,7 +99,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
||||
public class RegenotypeVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
|
||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
|
|
|||
|
|
@ -46,20 +46,25 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.pairhmm;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin, carneiro
|
||||
* Date: 10/16/12
|
||||
*/
|
||||
|
||||
public class LoglessCachingPairHMM extends CachingPairHMM {
|
||||
|
||||
public class LoglessCachingPairHMM extends PairHMM {
|
||||
protected static final double SCALE_FACTOR_LOG10 = 300.0;
|
||||
|
||||
double[][] constantMatrix = null; // The cache
|
||||
double[][] distanceMatrix = null; // The cache
|
||||
boolean constantsAreInitialized = false;
|
||||
|
||||
/**
|
||||
* Cached data structure that describes the first row's edge condition in the HMM
|
||||
*/
|
||||
protected static final double [] firstRowConstantMatrix = {
|
||||
QualityUtils.qualToProb((byte) (DEFAULT_GOP + DEFAULT_GOP)),
|
||||
QualityUtils.qualToProb(DEFAULT_GCP),
|
||||
|
|
@ -69,63 +74,48 @@ public class LoglessCachingPairHMM extends CachingPairHMM {
|
|||
1.0
|
||||
};
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH ) {
|
||||
public void initialize( final int haplotypeMaxLength, final int readMaxLength) {
|
||||
super.initialize(haplotypeMaxLength, readMaxLength);
|
||||
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||
final int X_METRIC_LENGTH = READ_MAX_LENGTH + 2;
|
||||
final int Y_METRIC_LENGTH = HAPLOTYPE_MAX_LENGTH + 2;
|
||||
|
||||
matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
|
||||
for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {
|
||||
Arrays.fill(matchMetricArray[iii], 0.0);
|
||||
Arrays.fill(XMetricArray[iii], 0.0);
|
||||
Arrays.fill(YMetricArray[iii], 0.0);
|
||||
}
|
||||
|
||||
// the initial condition
|
||||
matchMetricArray[1][1] = Math.pow(10.0, SCALE_FACTOR_LOG10); // Math.log10(1.0);
|
||||
|
||||
constantMatrix = new double[X_METRIC_LENGTH][6];
|
||||
distanceMatrix = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
|
||||
// fill in the first row
|
||||
for( int jjj = 2; jjj < Y_METRIC_LENGTH; jjj++ ) {
|
||||
updateCell(1, jjj, 1.0, firstRowConstantMatrix, matchMetricArray, XMetricArray, YMetricArray);
|
||||
}
|
||||
constantMatrix = new double[X_METRIC_MAX_LENGTH][6];
|
||||
distanceMatrix = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH];
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public double computeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
|
||||
final byte[] readBases,
|
||||
final byte[] readQuals,
|
||||
final byte[] insertionGOP,
|
||||
final byte[] deletionGOP,
|
||||
final byte[] overallGCP,
|
||||
final int hapStartIndex,
|
||||
final boolean recacheReadValues ) {
|
||||
|
||||
if( recacheReadValues ) {
|
||||
initializeConstants( insertionGOP, deletionGOP, overallGCP );
|
||||
}
|
||||
public double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
|
||||
final byte[] readBases,
|
||||
final byte[] readQuals,
|
||||
final byte[] insertionGOP,
|
||||
final byte[] deletionGOP,
|
||||
final byte[] overallGCP,
|
||||
final int hapStartIndex,
|
||||
final boolean recacheReadValues ) {
|
||||
if ( ! constantsAreInitialized || recacheReadValues )
|
||||
initializeConstants( haplotypeBases.length, readBases.length, insertionGOP, deletionGOP, overallGCP );
|
||||
initializeDistanceMatrix( haplotypeBases, readBases, readQuals, hapStartIndex );
|
||||
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||
final int X_METRIC_LENGTH = readBases.length + 2;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
|
||||
// NOTE NOTE NOTE -- because of caching we need to only operate over X and Y according to this
|
||||
// read and haplotype lengths, not the max lengths
|
||||
final int readXMetricLength = readBases.length + 2;
|
||||
final int hapYMetricLength = haplotypeBases.length + 2;
|
||||
|
||||
for (int i = 2; i < X_METRIC_LENGTH; i++) {
|
||||
for (int j = hapStartIndex+1; j < Y_METRIC_LENGTH; j++) {
|
||||
for (int i = 2; i < readXMetricLength; i++) {
|
||||
// +1 here is because hapStartIndex is 0-based, but our matrices are 1 based
|
||||
for (int j = hapStartIndex+1; j < hapYMetricLength; j++) {
|
||||
updateCell(i, j, distanceMatrix[i][j], constantMatrix[i], matchMetricArray, XMetricArray, YMetricArray);
|
||||
}
|
||||
}
|
||||
|
||||
// final probability is the log10 sum of the last element in all three state arrays
|
||||
final int endI = X_METRIC_LENGTH - 1;
|
||||
final int endJ = Y_METRIC_LENGTH - 1;
|
||||
final int endI = readXMetricLength - 1;
|
||||
final int endJ = hapYMetricLength - 1;
|
||||
return Math.log10( matchMetricArray[endI][endJ] + XMetricArray[endI][endJ] + YMetricArray[endI][endJ] ) - SCALE_FACTOR_LOG10;
|
||||
}
|
||||
|
||||
|
|
@ -160,13 +150,32 @@ public class LoglessCachingPairHMM extends CachingPairHMM {
|
|||
/**
|
||||
* Initializes the matrix that holds all the constants related to quality scores.
|
||||
*
|
||||
* @param haplotypeSize the number of bases in the haplotype we are testing
|
||||
* @param readSize the number of bases in the read we are testing
|
||||
* @param insertionGOP insertion quality scores of the read
|
||||
* @param deletionGOP deletion quality scores of the read
|
||||
* @param overallGCP overall gap continuation penalty
|
||||
*/
|
||||
public void initializeConstants( final byte[] insertionGOP,
|
||||
final byte[] deletionGOP,
|
||||
final byte[] overallGCP ) {
|
||||
@Requires({
|
||||
"haplotypeSize > 0",
|
||||
"readSize > 0",
|
||||
"insertionGOP != null && insertionGOP.length == readSize",
|
||||
"deletionGOP != null && deletionGOP.length == readSize",
|
||||
"overallGCP != null && overallGCP.length == readSize"
|
||||
})
|
||||
@Ensures("constantsAreInitialized")
|
||||
private void initializeConstants( final int haplotypeSize,
|
||||
final int readSize,
|
||||
final byte[] insertionGOP,
|
||||
final byte[] deletionGOP,
|
||||
final byte[] overallGCP ) {
|
||||
// the initial condition -- must be here because it needs that actual read and haplotypes, not the maximum in init
|
||||
matchMetricArray[1][1] = Math.pow(10.0, SCALE_FACTOR_LOG10) / getNPotentialXStarts(haplotypeSize, readSize);
|
||||
|
||||
// fill in the first row
|
||||
for( int jjj = 2; jjj < Y_METRIC_MAX_LENGTH; jjj++ ) {
|
||||
updateCell(1, jjj, 1.0, firstRowConstantMatrix, matchMetricArray, XMetricArray, YMetricArray);
|
||||
}
|
||||
|
||||
final int l = insertionGOP.length;
|
||||
constantMatrix[1] = firstRowConstantMatrix;
|
||||
|
|
@ -181,6 +190,9 @@ public class LoglessCachingPairHMM extends CachingPairHMM {
|
|||
}
|
||||
constantMatrix[l+1][4] = 1.0;
|
||||
constantMatrix[l+1][5] = 1.0;
|
||||
|
||||
// note that we initialized the constants
|
||||
constantsAreInitialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -66,6 +66,13 @@ public class BQSRReadTransformer extends ReadTransformer {
|
|||
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
this.enabled = engine.hasBQSRArgumentSet();
|
||||
if ( enabled ) {
|
||||
// TODO -- See important note below about applying BQSR to a reduced BAM file:
|
||||
// If it is important to make sure that BQSR is not applied (as opposed to having the covariates computed) against a reduced bam file,
|
||||
// we need to figure out how to make this work. The problem is that the ReadTransformers are initialized before the ReadDataSource
|
||||
// inside the GenomeAnalysisEngine, so we generate a NPE when trying to retrieve the SAMFileHeaders. Ultimately, I don't think this is
|
||||
// a necessary check anyways since we disallow running BaseRecalibrator on reduced bams (so we can't generate the recal tables to use here).
|
||||
// Although we could add this check to the apply() method below, it's kind of ugly and inefficient.
|
||||
// The call here would be: RecalUtils.checkForInvalidRecalBams(engine.getSAMFileHeaders(), engine.getArguments().ALLOW_BQSR_ON_REDUCED_BAMS);
|
||||
final BQSRArgumentSet args = engine.getBQSRArgumentSet();
|
||||
this.bqsr = new BaseRecalibration(args.getRecalFile(), args.getQuantizationLevels(), args.shouldDisableIndelQuals(), args.getPreserveQscoresLessThan(), args.shouldEmitOriginalQuals(), args.getGlobalQScorePrior());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,7 +52,6 @@ import net.sf.samtools.SAMUtils;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -174,7 +173,7 @@ public class BaseRecalibration {
|
|||
double recalibratedQualDouble = hierarchicalBayesianQualityEstimate( epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovs );
|
||||
|
||||
// recalibrated quality is bound between 1 and MAX_QUAL
|
||||
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), RecalDatum.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
// return the quantized version of the recalibrated quality
|
||||
final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual);
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ public class QualQuantizer {
|
|||
"nObservations >= 0",
|
||||
"nErrors >= 0",
|
||||
"nErrors <= nObservations",
|
||||
"fixedQual >= -1 && fixedQual <= QualityUtils.MAX_QUAL_SCORE",
|
||||
"fixedQual >= -1 && fixedQual <= QualityUtils.MAX_SAM_QUAL_SCORE",
|
||||
"mergeOrder >= 0"})
|
||||
protected final class QualInterval implements Comparable<QualInterval> {
|
||||
final int qStart, qEnd, fixedQual, level;
|
||||
|
|
@ -224,10 +224,10 @@ public class QualQuantizer {
|
|||
/**
|
||||
* @return the QUAL of the error rate of this interval, or the fixed qual if this interval was created with a fixed qual.
|
||||
*/
|
||||
@Ensures("result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE")
|
||||
@Ensures("result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE")
|
||||
public byte getQual() {
|
||||
if ( ! hasFixedQual() )
|
||||
return QualityUtils.probToQual(1-getErrorRate(), 0);
|
||||
return QualityUtils.errorProbToQual(getErrorRate());
|
||||
else
|
||||
return (byte)fixedQual;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ public class QuantizationInfo {
|
|||
}
|
||||
|
||||
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_SAM_QUAL_SCORE +1]; // create a histogram with the empirical quality distribution
|
||||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ public class QuantizationInfo {
|
|||
}
|
||||
|
||||
public void noQuantization() {
|
||||
this.quantizationLevels = QualityUtils.MAX_QUAL_SCORE;
|
||||
this.quantizationLevels = QualityUtils.MAX_SAM_QUAL_SCORE;
|
||||
for (int i = 0; i < this.quantizationLevels; i++)
|
||||
quantizedQuals.set(i, (byte) i);
|
||||
}
|
||||
|
|
@ -124,7 +124,7 @@ public class QuantizationInfo {
|
|||
quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
|
||||
for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
|
||||
for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) {
|
||||
quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
|
||||
quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
|
||||
quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
|
||||
|
|
|
|||
|
|
@ -74,10 +74,10 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.apache.commons.math.optimization.fitting.GaussianFunction;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -100,6 +100,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
"numMismatches <= numObservations"
|
||||
})
|
||||
public class RecalDatum {
|
||||
public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
private static final double UNINITIALIZED = -1.0;
|
||||
|
||||
/**
|
||||
|
|
@ -337,7 +338,7 @@ public class RecalDatum {
|
|||
// This is the old and busted point estimate approach:
|
||||
//final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
|
||||
|
||||
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
empiricalQuality = Math.min(empiricalQual, (double) MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
//static final boolean DEBUG = false;
|
||||
|
|
@ -369,7 +370,12 @@ public class RecalDatum {
|
|||
return Qemp;
|
||||
}
|
||||
|
||||
static private final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
|
||||
/**
|
||||
* Quals above this value should be capped down to this value (because they are too high)
|
||||
* in the base quality score recalibrator
|
||||
*/
|
||||
public final static byte MAX_GATK_USABLE_Q_SCORE = 40;
|
||||
static private final double[] log10QempPriorCache = new double[MAX_GATK_USABLE_Q_SCORE + 1];
|
||||
static {
|
||||
// f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
|
||||
// Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
|
||||
|
|
@ -379,7 +385,7 @@ public class RecalDatum {
|
|||
final double GF_d = 0.5; // with these parameters, deltas can shift at most ~20 Q points
|
||||
|
||||
final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
|
||||
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ ) {
|
||||
for ( int i = 0; i <= MAX_GATK_USABLE_Q_SCORE; i++ ) {
|
||||
double log10Prior = Math.log10(gaussian.value((double) i));
|
||||
if ( Double.isInfinite(log10Prior) )
|
||||
log10Prior = -Double.MAX_VALUE;
|
||||
|
|
@ -388,7 +394,7 @@ public class RecalDatum {
|
|||
}
|
||||
|
||||
static protected double log10QempPrior(final double Qempirical, final double Qreported) {
|
||||
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE);
|
||||
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), MAX_GATK_USABLE_Q_SCORE);
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
|
||||
return log10QempPriorCache[difference];
|
||||
|
|
|
|||
|
|
@ -46,10 +46,14 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.compression.reducereads.ReduceReads;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -57,7 +61,6 @@ import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -421,7 +424,7 @@ public class RecalUtils {
|
|||
|
||||
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
||||
|
||||
final NestedHashMap deltaTable = new NestedHashMap();
|
||||
final NestedIntegerArray<RecalDatum> deltaTable = createDeltaTable(recalibrationTables, requestedCovariates.length);
|
||||
|
||||
// add the quality score table to the delta table
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable();
|
||||
|
|
@ -468,24 +471,57 @@ public class RecalUtils {
|
|||
covariateNameMap.put(covariate, parseCovariateName(covariate));
|
||||
|
||||
// print each data line
|
||||
for (final NestedHashMap.Leaf leaf : deltaTable.getAllLeaves()) {
|
||||
for (final NestedIntegerArray.Leaf<RecalDatum> leaf : deltaTable.getAllLeaves()) {
|
||||
final List<Object> deltaKeys = generateValuesFromKeys(leaf.keys, requestedCovariates, covariateNameMap);
|
||||
final RecalDatum deltaDatum = (RecalDatum)leaf.value;
|
||||
final RecalDatum deltaDatum = leaf.value;
|
||||
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||
deltaTableFile.print("," + deltaDatum.stringForCSV());
|
||||
deltaTableFile.println("," + recalibrationMode);
|
||||
}
|
||||
}
|
||||
|
||||
protected static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
||||
/*
|
||||
* Return an initialized nested integer array with appropriate dimensions for use with the delta tables
|
||||
*
|
||||
* @param recalibrationTables the recal tables
|
||||
* @param numCovariates the total number of covariates being used
|
||||
* @return a non-null nested integer array
|
||||
*/
|
||||
@Requires("recalibrationTables != null && numCovariates > 0")
|
||||
@Ensures("result != null")
|
||||
private static NestedIntegerArray<RecalDatum> createDeltaTable(final RecalibrationTables recalibrationTables, final int numCovariates) {
|
||||
|
||||
final int[] dimensionsForDeltaTable = new int[4];
|
||||
|
||||
// initialize the dimensions with those of the qual table to start with
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable();
|
||||
final int[] dimensionsOfQualTable = qualTable.getDimensions();
|
||||
dimensionsForDeltaTable[0] = dimensionsOfQualTable[0]; // num read groups
|
||||
dimensionsForDeltaTable[1] = numCovariates + 1; // num covariates
|
||||
dimensionsForDeltaTable[2] = dimensionsOfQualTable[1];
|
||||
dimensionsForDeltaTable[3] = dimensionsOfQualTable[2];
|
||||
|
||||
// now, update the dimensions based on the optional covariate tables as needed
|
||||
for ( int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < numCovariates; i++ ) {
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(i);
|
||||
final int[] dimensionsOfCovTable = covTable.getDimensions();
|
||||
dimensionsForDeltaTable[2] = Math.max(dimensionsForDeltaTable[2], dimensionsOfCovTable[2]);
|
||||
dimensionsForDeltaTable[3] = Math.max(dimensionsForDeltaTable[3], dimensionsOfCovTable[3]);
|
||||
}
|
||||
|
||||
return new NestedIntegerArray<RecalDatum>(dimensionsForDeltaTable);
|
||||
}
|
||||
|
||||
protected static List<Object> generateValuesFromKeys(final int[] keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
||||
final List<Object> values = new ArrayList<Object>(4);
|
||||
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()].formatKey((Integer)keys.get(0)));
|
||||
final int covariateIndex = (Integer)keys.get(1);
|
||||
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()].formatKey(keys[0]));
|
||||
|
||||
final int covariateIndex = keys[1];
|
||||
final int covariateKey = keys[2];
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal()] : covariates[covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(2);
|
||||
values.add(covariate.formatKey(covariateKey));
|
||||
values.add(covariateNameMap.get(covariate));
|
||||
values.add(EventType.eventFrom((Integer)keys.get(3)).prettyPrint());
|
||||
values.add(EventType.eventFrom(keys[3]).prettyPrint());
|
||||
|
||||
return values;
|
||||
}
|
||||
|
|
@ -499,20 +535,14 @@ public class RecalUtils {
|
|||
* @param deltaKey the key to the table
|
||||
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||
*/
|
||||
private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
|
||||
Object[] wrappedKey = wrapKeys(deltaKey);
|
||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
|
||||
private static void addToDeltaTable(final NestedIntegerArray<RecalDatum> deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
|
||||
final RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
if (deltaDatum == null)
|
||||
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
// if we don't have a key yet, create a new one with the same values as the current datum
|
||||
deltaTable.put(new RecalDatum(recalDatum), deltaKey);
|
||||
else
|
||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||
}
|
||||
|
||||
private static Object[] wrapKeys(final int[] keys) {
|
||||
final Object[] wrappedKeys = new Object[keys.length];
|
||||
for (int i = 0; i < keys.length; i++)
|
||||
wrappedKeys[i] = keys[i];
|
||||
return wrappedKeys;
|
||||
// if we do have a datum, combine it with this one
|
||||
deltaDatum.combine(recalDatum);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -847,7 +877,6 @@ public class RecalUtils {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* creates a datum object with one observation and one or zero error
|
||||
*
|
||||
|
|
@ -858,4 +887,20 @@ public class RecalUtils {
|
|||
private static RecalDatum createDatumObject(final byte reportedQual, final double isError) {
|
||||
return new RecalDatum(1, isError, reportedQual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for invalid BAMs that are being used with BQSR and fails with a UserException if it finds one
|
||||
*
|
||||
* @param headers sam file headers being passed into the GATK engine
|
||||
* @param allowBqsrOnReducedBams should we allow BQSR on reduced bams?
|
||||
*/
|
||||
public static void checkForInvalidRecalBams(final List<SAMFileHeader> headers, final boolean allowBqsrOnReducedBams) {
|
||||
// for now, the only check we make is against reduced bams
|
||||
if ( !allowBqsrOnReducedBams ) {
|
||||
for ( final SAMFileHeader header : headers ) {
|
||||
if ( header.getProgramRecord(ReduceReads.PROGRAM_RECORD_NAME) != null )
|
||||
throw new UserException.BadInput("base quality score recalibration should absolutely not be run on reduced BAM files! Please run ReduceReads only after BQSR has been performed");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -263,11 +263,11 @@ public class RecalibrationReport {
|
|||
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
|
||||
*
|
||||
* @param table the GATKReportTable containing the quantization mappings
|
||||
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
|
||||
* @return an ArrayList with the quantization mappings from 0 to MAX_SAM_QUAL_SCORE
|
||||
*/
|
||||
private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
|
||||
final Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
final Byte[] quals = new Byte[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
|
||||
final Long[] counts = new Long[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
final byte originalQual = (byte)i;
|
||||
final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
|
|
@ -322,6 +322,9 @@ public class RecalibrationReport {
|
|||
else if (argument.equals("deletions_default_quality"))
|
||||
RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
||||
|
||||
else if (argument.equals("maximum_cycle_value"))
|
||||
RAC.MAXIMUM_CYCLE_VALUE = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("low_quality_tail"))
|
||||
RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value);
|
||||
|
||||
|
|
|
|||
|
|
@ -136,9 +136,6 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
final int MAX_CYCLE_FOR_INDELS = readLength - CUSHION_FOR_INDELS - 1;
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
if ( cycle > MAXIMUM_CYCLE_VALUE )
|
||||
throw new UserException("The maximum allowed value for the cycle is " + MAXIMUM_CYCLE_VALUE + ", but a larger cycle was detected in read " + read.getReadName() + ". Please use the --maximum_cycle_value argument to increase this value (at the expense of requiring more memory to run)");
|
||||
|
||||
final int substitutionKey = keyFromCycle(cycle);
|
||||
final int indelKey = (i < CUSHION_FOR_INDELS || i > MAX_CYCLE_FOR_INDELS) ? -1 : substitutionKey;
|
||||
values.addCovariate(substitutionKey, indelKey, indelKey, i);
|
||||
|
|
@ -268,9 +265,12 @@ public class CycleCovariate implements StandardCovariate {
|
|||
return (MAXIMUM_CYCLE_VALUE << 1) + 1;
|
||||
}
|
||||
|
||||
private static int keyFromCycle(final int cycle) {
|
||||
private int keyFromCycle(final int cycle) {
|
||||
// no negative values because values must fit into the first few bits of the long
|
||||
int result = Math.abs(cycle);
|
||||
if ( result > MAXIMUM_CYCLE_VALUE )
|
||||
throw new UserException("The maximum allowed value for the cycle is " + MAXIMUM_CYCLE_VALUE + ", but a larger cycle (" + result + ") was detected. Please use the --maximum_cycle_value argument to increase this value (at the expense of requiring more memory to run)");
|
||||
|
||||
result = result << 1; // shift so we can add the "sign" bit
|
||||
if ( cycle < 0 )
|
||||
result++; // negative cycles get the lower-most bit set
|
||||
|
|
|
|||
|
|
@ -119,6 +119,6 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
return QualityUtils.MAX_QUAL_SCORE;
|
||||
return QualityUtils.MAX_SAM_QUAL_SCORE;
|
||||
}
|
||||
}
|
||||
|
|
@ -46,10 +46,18 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
* Basic unit test for AlleleBiasedDownsamplingUtils
|
||||
|
|
@ -126,4 +134,75 @@ public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testLoadContaminationFile1(){
|
||||
Logger logger=org.apache.log4j.Logger.getRootLogger();
|
||||
|
||||
final String ArtificalBAMLocation = privateTestDir + "ArtificallyContaminatedBams/";
|
||||
final File ContamFile1=new File(ArtificalBAMLocation+"contamination.case.1.txt");
|
||||
|
||||
Map<String,Double> Contam1=new HashMap<String,Double>();
|
||||
Set<String> Samples1=new HashSet<String>();
|
||||
|
||||
Contam1.put("NA11918",0.15);
|
||||
Samples1.addAll(Contam1.keySet());
|
||||
testLoadFile(ContamFile1,Samples1,Contam1,logger);
|
||||
|
||||
Contam1.put("NA12842",0.13);
|
||||
Samples1.addAll(Contam1.keySet());
|
||||
testLoadFile(ContamFile1,Samples1,Contam1,logger);
|
||||
|
||||
Samples1.add("DUMMY");
|
||||
testLoadFile(ContamFile1,Samples1,Contam1,logger);
|
||||
}
|
||||
|
||||
private static void testLoadFile(final File file, final Set<String> Samples, final Map<String,Double> map, Logger logger){
|
||||
Map<String,Double> loadedMap = AlleleBiasedDownsamplingUtils.loadContaminationFile(file,0.0,Samples,logger);
|
||||
Assert.assertTrue(loadedMap.equals(map));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadContaminationFiles(){
|
||||
Logger logger=org.apache.log4j.Logger.getRootLogger();
|
||||
final String ArtificalBAMLocation = privateTestDir + "ArtificallyContaminatedBams/";
|
||||
|
||||
for(int i=1; i<=5; i++){
|
||||
File ContamFile=new File(ArtificalBAMLocation+String.format("contamination.case.%d.txt",i));
|
||||
Assert.assertTrue(AlleleBiasedDownsamplingUtils.loadContaminationFile(ContamFile,0.0,null,logger).size()==2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.MalformedFile.class)
|
||||
public void testLoadBrokenContaminationFile1(){
|
||||
testLoadBrokenContaminationFile(1);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.MalformedFile.class)
|
||||
public void testLoadBrokenContaminationFile2(){
|
||||
testLoadBrokenContaminationFile(2);
|
||||
}
|
||||
@Test(expectedExceptions = UserException.MalformedFile.class)
|
||||
public void testLoadBrokenContaminationFile3(){
|
||||
testLoadBrokenContaminationFile(3);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.MalformedFile.class)
|
||||
public void testLoadBrokenContaminationFile4(){
|
||||
testLoadBrokenContaminationFile(4);
|
||||
}
|
||||
|
||||
|
||||
public void testLoadBrokenContaminationFile(final int i){
|
||||
Logger logger=org.apache.log4j.Logger.getRootLogger();
|
||||
final String ArtificalBAMLocation = privateTestDir + "ArtificallyContaminatedBams/";
|
||||
|
||||
File ContaminationFile=new File(ArtificalBAMLocation+String.format("contamination.case.broken.%d.txt",i));
|
||||
AlleleBiasedDownsamplingUtils.loadContaminationFile(ContaminationFile,0.0,null,logger);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,26 +94,27 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
private static final String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
private static final String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
|
||||
@DataProvider(name = "BQSRTest")
|
||||
public Object[][] createBQSRTestData() {
|
||||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "6b3f252718f59cf9fd3f7612f73a35bf")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "863576ac9ff0b0e02f2e84aef15923a7")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "03e28f48201a35c70d1cf48e9f45364f")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "6e3c5635d387a1c428a7c9c88ad26488")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "6507adcb94bacde4cdee9caa9f14f24b")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "399bbb4bf80764dfc644b2f95d824615")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "34d70899253c2b3343ca9ae944291c30")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "e61fa47bfc08433f0cd55558e2081548")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "5c2622c63225b8b04990baf0ae4de07c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "ee7191d83d7d5bb957dc4595883c32f1")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "da92f4730356f479c2c2b71497cfac6d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "8075595113b48c0c7ead08ce41bef9fe")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "be05834841c5690c66910270521d5c32")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "e61fa47bfc08433f0cd55558e2081548")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "8ee0b498dbbc95ce76393a0f089fec92")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "61fd466b5e94d2d67e116f6f67c9f939")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "e08b5bcdb64f4beea03730e5631a14ca")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "448a45dc154c95d1387cb5cdddb67071")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "c1e7999e445d51bbe2e775dac5325643")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a57c16918cdfe12d55a89c21bf195279")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "836dccacf48ccda6b2843d07e8f1ef4d")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "0fb2aedc2f8d66b5821cb570f15a8c4d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c9953f020a65c1603a6d71aeeb1b95f3")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "85a120b7d86b61597b86b9e93decbdfc")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "5248dc49aec0323c74b496bb4928c73c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "cb52f267e0010f849f50b0bf1de474a1")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "1425a5063ee757dbfc013df24e65a67a")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "c1c3cda8caceed619d3d439c3990cd26")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c9953f020a65c1603a6d71aeeb1b95f3")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5bfff0c699345cca12a9b33acf95588f")},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -150,7 +151,7 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
" -sortAllCols" +
|
||||
" --plot_pdf_file /dev/null" +
|
||||
" --intermediate_csv_file %s",
|
||||
Arrays.asList("dd6e0e1e3f53f8ae0c8f5de21ded6ee9"));
|
||||
Arrays.asList("90ad19143024684e3c4410dc8fd2bd9d"));
|
||||
executeTest("testBQSR-CSVfile", spec);
|
||||
}
|
||||
|
||||
|
|
@ -169,6 +170,19 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
executeTest("testBQSRFailWithSolidNoCall", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBQSRFailWithReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
" -T BaseRecalibrator" +
|
||||
" -R " + b37KGReference +
|
||||
" -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam" +
|
||||
" -L 1:67,225,396-67,288,518" +
|
||||
" -o /dev/null",
|
||||
0,
|
||||
UserException.class);
|
||||
executeTest("testBQSRFailWithReducedBam", spec);
|
||||
}
|
||||
|
||||
private static class PRTest {
|
||||
final String args;
|
||||
final String md5;
|
||||
|
|
@ -206,10 +220,41 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
" -R " + hg18Reference +
|
||||
" -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
|
||||
" -nct " + nct +
|
||||
" --no_pg_tag" +
|
||||
" -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" +
|
||||
params.args +
|
||||
" -o %s",
|
||||
Arrays.asList(params.md5));
|
||||
executeTest("testPrintReads-"+params.args, spec).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPRNoFailWithHighMaxCycle() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
" -T PrintReads" +
|
||||
" -R " + hg18Reference +
|
||||
" -I " + HiSeqBam +
|
||||
" -L " + HiSeqInterval +
|
||||
" --no_pg_tag" +
|
||||
" -BQSR " + privateTestDir + "HiSeq.1mb.1RG.highMaxCycle.table" +
|
||||
" -o /dev/null",
|
||||
0,
|
||||
Arrays.<String>asList());
|
||||
executeTest("testPRNoFailWithHighMaxCycle", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPRFailWithLowMaxCycle() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
" -T PrintReads" +
|
||||
" -R " + hg18Reference +
|
||||
" -I " + HiSeqBam +
|
||||
" -L " + HiSeqInterval +
|
||||
" --no_pg_tag" +
|
||||
" -BQSR " + privateTestDir + "HiSeq.1mb.1RG.lowMaxCycle.table" +
|
||||
" -o /dev/null",
|
||||
0,
|
||||
UserException.class);
|
||||
executeTest("testPRFailWithLowMaxCycle", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,60 +47,56 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Basic unit test for BaseCounts in reduced reads
|
||||
*/
|
||||
public class BaseCountsUnitTest extends BaseTest {
|
||||
private class SingleTest {
|
||||
|
||||
private class BaseCountsTest {
|
||||
public String bases;
|
||||
public byte mostCountBase;
|
||||
public int mostCommonCount;
|
||||
|
||||
private SingleTest(String bases, char mostCountBase, int mostCommonCount) {
|
||||
private BaseCountsTest(String bases, char mostCountBase, int mostCommonCount) {
|
||||
this.mostCommonCount = mostCommonCount;
|
||||
this.mostCountBase = (byte)mostCountBase;
|
||||
this.bases = bases;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "counting")
|
||||
public Object[][] createCountingData() {
|
||||
List<BaseCountsTest> params = new ArrayList<BaseCountsTest>();
|
||||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData1() {
|
||||
List<SingleTest> params = new ArrayList<SingleTest>();
|
||||
|
||||
params.add(new SingleTest("A", 'A', 1 ));
|
||||
params.add(new SingleTest("AA", 'A', 2 ));
|
||||
params.add(new SingleTest("AC", 'A', 1 ));
|
||||
params.add(new SingleTest("AAC", 'A', 2 ));
|
||||
params.add(new SingleTest("AAA", 'A', 3 ));
|
||||
params.add(new SingleTest("AAAN", 'A', 3 ));
|
||||
params.add(new SingleTest("AAANNNN", 'N', 4 ));
|
||||
params.add(new SingleTest("AACTG", 'A', 2 ));
|
||||
params.add(new SingleTest("D", 'D', 1 ));
|
||||
params.add(new SingleTest("DDAAD", 'D', 3));
|
||||
params.add(new SingleTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 ));
|
||||
params.add(new SingleTest("AAIIIAI", 'I', 4 ));
|
||||
params.add(new BaseCountsTest("A", 'A', 1 ));
|
||||
params.add(new BaseCountsTest("AA", 'A', 2 ));
|
||||
params.add(new BaseCountsTest("AC", 'A', 1 ));
|
||||
params.add(new BaseCountsTest("AAC", 'A', 2 ));
|
||||
params.add(new BaseCountsTest("AAA", 'A', 3 ));
|
||||
params.add(new BaseCountsTest("AAAN", 'A', 3 ));
|
||||
params.add(new BaseCountsTest("AAANNNN", 'N', 4 ));
|
||||
params.add(new BaseCountsTest("AACTG", 'A', 2 ));
|
||||
params.add(new BaseCountsTest("D", 'D', 1 ));
|
||||
params.add(new BaseCountsTest("DDAAD", 'D', 3));
|
||||
params.add(new BaseCountsTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 ));
|
||||
params.add(new BaseCountsTest("AAIIIAI", 'I', 4 ));
|
||||
|
||||
List<Object[]> params2 = new ArrayList<Object[]>();
|
||||
for ( SingleTest x : params ) params2.add(new Object[]{x});
|
||||
for ( BaseCountsTest x : params ) params2.add(new Object[]{x});
|
||||
return params2.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test(dataProvider = "data", enabled = true)
|
||||
public void testCounting(SingleTest params) {
|
||||
@Test(dataProvider = "counting", enabled = true)
|
||||
public void testCounting(BaseCountsTest params) {
|
||||
BaseCounts counts = new BaseCounts();
|
||||
|
||||
for ( byte base : params.bases.getBytes() )
|
||||
|
|
@ -110,5 +106,96 @@ public class BaseCountsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(counts.totalCount(), params.bases.length(), name);
|
||||
Assert.assertEquals(counts.countOfBase(counts.baseIndexWithMostCounts()), params.mostCommonCount, name);
|
||||
Assert.assertEquals((char)counts.baseWithMostCounts(), (char)params.mostCountBase, name);
|
||||
|
||||
// test the static creation
|
||||
final int[] countsArray = new int[] { counts.countOfBase(BaseIndex.A), counts.countOfBase(BaseIndex.C),
|
||||
counts.countOfBase(BaseIndex.G), counts.countOfBase(BaseIndex.T)};
|
||||
final BaseCounts countsFromArray = BaseCounts.createWithCounts(countsArray);
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.C), countsFromArray.countOfBase(BaseIndex.C));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.G), countsFromArray.countOfBase(BaseIndex.G));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.T), countsFromArray.countOfBase(BaseIndex.T));
|
||||
Assert.assertEquals(ACGTcounts(counts), countsFromArray.totalCount());
|
||||
|
||||
// test addition
|
||||
counts.add(countsFromArray);
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.A), 2 * countsFromArray.countOfBase(BaseIndex.A));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.C), 2 * countsFromArray.countOfBase(BaseIndex.C));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.G), 2 * countsFromArray.countOfBase(BaseIndex.G));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.T), 2 * countsFromArray.countOfBase(BaseIndex.T));
|
||||
Assert.assertEquals(ACGTcounts(counts), 2 * countsFromArray.totalCount());
|
||||
|
||||
// test subtraction
|
||||
counts.sub(countsFromArray);
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.C), countsFromArray.countOfBase(BaseIndex.C));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.G), countsFromArray.countOfBase(BaseIndex.G));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.T), countsFromArray.countOfBase(BaseIndex.T));
|
||||
Assert.assertEquals(ACGTcounts(counts), countsFromArray.totalCount());
|
||||
|
||||
// test decrementing
|
||||
if ( counts.countOfBase(BaseIndex.A) > 0 ) {
|
||||
counts.decr((byte)'A');
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
private static int ACGTcounts(final BaseCounts baseCounts) {
|
||||
return baseCounts.totalCountWithoutIndels() - baseCounts.countOfBase(BaseIndex.N);
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////
|
||||
// TEST FOR QUALS IN BASECOUNTS //
|
||||
//////////////////////////////////
|
||||
|
||||
private class BaseCountsQualsTest {
|
||||
public final List<Integer> quals;
|
||||
|
||||
private BaseCountsQualsTest(final List<Integer> quals) {
|
||||
this.quals = quals;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "quals")
|
||||
public Object[][] createQualsData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final int[] quals = new int[]{ 0, 5, 10, 15, 20, 30, 40, 50 };
|
||||
|
||||
for ( final int qual1 : quals ) {
|
||||
for ( final int qual2 : quals ) {
|
||||
for ( final int qual3 : quals ) {
|
||||
tests.add(new Object[]{new BaseCountsQualsTest(Arrays.asList(qual1, qual2, qual3))});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "quals", enabled = true)
|
||||
public void testQuals(BaseCountsQualsTest test) {
|
||||
BaseCounts counts = new BaseCounts();
|
||||
|
||||
for ( int qual : test.quals )
|
||||
counts.incr(BaseIndex.A, (byte)qual);
|
||||
|
||||
final int actualSum = (int)counts.getSumQuals((byte)'A');
|
||||
final int expectedSum = qualSum(test.quals);
|
||||
Assert.assertEquals(actualSum, expectedSum);
|
||||
|
||||
final int actualAverage = (int)counts.averageQuals((byte)'A');
|
||||
Assert.assertEquals(actualAverage, expectedSum / test.quals.size());
|
||||
|
||||
// test both proportion methods
|
||||
Assert.assertEquals(counts.baseCountProportion(BaseIndex.A), counts.baseCountProportion((byte)'A'));
|
||||
}
|
||||
|
||||
private static int qualSum(final List<Integer> quals) {
|
||||
int sum = 0;
|
||||
for ( final int qual : quals )
|
||||
sum += qual;
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
|
@ -131,4 +131,69 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05));
|
||||
Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip);
|
||||
}
|
||||
|
||||
|
||||
private class AllelesTest {
|
||||
public final int[] counts;
|
||||
public final double proportion;
|
||||
|
||||
private AllelesTest(final int[] counts, final double proportion) {
|
||||
this.counts = counts;
|
||||
this.proportion = proportion;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "alleles")
|
||||
public Object[][] createAllelesData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final int[] counts = new int[]{ 0, 5, 10, 15, 20 };
|
||||
final double [] proportions = new double[]{ 0.0, 0.05, 0.10, 0.50, 1.0 };
|
||||
|
||||
for ( final int count1 : counts ) {
|
||||
for ( final int count2 : counts ) {
|
||||
for ( final int count3 : counts ) {
|
||||
for ( final int count4 : counts ) {
|
||||
for ( final double proportion : proportions ) {
|
||||
tests.add(new Object[]{new AllelesTest(new int[]{count1, count2, count3, count4}, proportion)});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "alleles", enabled = true)
|
||||
public void testAlleles(AllelesTest test) {
|
||||
|
||||
HeaderElement headerElement = new HeaderElement(1000, 0);
|
||||
for ( int i = 0; i < test.counts.length; i++ ) {
|
||||
BaseIndex base = BaseIndex.values()[i];
|
||||
for ( int j = 0; j < test.counts[i]; j++ )
|
||||
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
|
||||
}
|
||||
|
||||
final int nAllelesSeen = headerElement.getNumberOfAlleles(test.proportion);
|
||||
final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.proportion);
|
||||
|
||||
Assert.assertEquals(nAllelesSeen, nAllelesExpected);
|
||||
}
|
||||
|
||||
private static int calculateExpectedAlleles(final int[] counts, final double proportion) {
|
||||
double total = 0.0;
|
||||
for ( final int count : counts ) {
|
||||
total += count;
|
||||
}
|
||||
|
||||
final int minCount = (int)(proportion * total);
|
||||
|
||||
int result = 0;
|
||||
for ( final int count : counts ) {
|
||||
if ( count > 0 && count >= minCount )
|
||||
result++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,40 +74,40 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompression() {
|
||||
RRTest("testDefaultCompression ", L, "98080d3c53f441564796fc143cf510da");
|
||||
RRTest("testDefaultCompression ", L, "17908e8515217c4693d303ed68108ccc");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testInsertionsAtEdgeOfConsensus() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
|
||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("2a6e08a0206bd8ec7671224c4a55dae0")));
|
||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("3103667fc68c3136a8cfa8e22429f94e")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervals() {
|
||||
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||
RRTest("testMultipleIntervals ", intervals, "c5dcdf4edf368b5b897d66f76034d9f0");
|
||||
RRTest("testMultipleIntervals ", intervals, "497c5e36c2beaad2fcdbd02a0b9c121b");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHighCompression() {
|
||||
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "27cb99e87eda5e46187e56f50dd37f26");
|
||||
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "0ff4142e4d7b6a9a9c76012246ad9e2d");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLowCompression() {
|
||||
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "4e7f111688d49973c35669855b7a2eaf");
|
||||
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "7890a37444a0e05b902f63a83238ce37");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testIndelCompression() {
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f6c9ea83608f35f113cf1f62a77ee6d0");
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f58ae2154e0e5716be0e850b7605856e");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testFilteredDeletionCompression() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("122e4e60c4412a31d0aeb3cce879e841")));
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bfe0693aea74634f1035a9bd11302517")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -121,7 +121,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testAddingReadAfterTailingTheStash() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("647b0f0f95730de8e6bc4f74186ad4df")));
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("f118e83c394d21d901a24230379864fc")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -131,13 +131,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testDivideByZero() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||
executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("2c87985972dd43ee9dd50b463d93a511")));
|
||||
executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bd5198a3e21034887b741faaaa3964bf")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReduction() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("5c30fde961a1357bf72c15144c01981b")));
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("13c44a9afa92ae728bf55b7075cc5de3")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -147,7 +147,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testReadOffContig() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("2f17c1a78e9d0138217fdb83cede8f68")));
|
||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("922be8b1151dd0d92602af93b77f7a51")));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
|
||||
public class ReduceReadsUnitTest extends BaseTest {
|
||||
|
||||
Random random = new Random(987743);
|
||||
Object2LongOpenHashMap<String> hash = new Object2LongOpenHashMap<String>();
|
||||
long nextNumber = 0L;
|
||||
|
||||
/**
|
||||
* Combinatorial unit test data provider example.
|
||||
*
|
||||
* Creates data for testMyData test function, containing two arguments, start and size at each value
|
||||
*
|
||||
* @return Object[][] for testng DataProvider
|
||||
*/
|
||||
@DataProvider(name = "ReadNameProvider")
|
||||
public Object[][] readNameProvider() {
|
||||
final int readNameLength = 4;
|
||||
final int nReads = 100000;
|
||||
final int charVariety = 20;
|
||||
ObjectArrayList<Object[]> tests = new ObjectArrayList<Object[]>();
|
||||
ObjectOpenHashSet<String> truthSet = new ObjectOpenHashSet<String>();
|
||||
byte[] bytes = new byte[readNameLength];
|
||||
for ( int i = 0; i<nReads; i++) {
|
||||
random.nextBytes(bytes);
|
||||
StringBuilder readNameBuilder = new StringBuilder(readNameLength);
|
||||
for (byte b : bytes) {
|
||||
readNameBuilder.append((char) ('a' + Math.abs(b) % charVariety));
|
||||
}
|
||||
String readName = readNameBuilder.toString();
|
||||
tests.add(new Object[]{readName, truthSet.contains(readName)});
|
||||
truthSet.add(readName);
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the read name compression functionality
|
||||
*/
|
||||
@Test(dataProvider = "ReadNameProvider")
|
||||
public void testReadNameCompression(final String name, final boolean alreadySeen) {
|
||||
GATKSAMRecord read = GATKSAMRecord.createRandomRead(1);
|
||||
read.setReadName(name);
|
||||
final int previousHashSize = hash.keySet().size();
|
||||
final long previousNumber = nextNumber;
|
||||
nextNumber = ReduceReads.compressReadName(hash, read, nextNumber);
|
||||
Assert.assertEquals(hash.keySet().size(), alreadySeen ? previousHashSize : previousHashSize + 1);
|
||||
Assert.assertEquals(nextNumber, alreadySeen ? previousNumber : previousNumber + 1);
|
||||
Assert.assertTrue(hash.containsKey(name));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -46,9 +46,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectList;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectSet;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
||||
|
|
@ -225,7 +230,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
|
||||
private static final int readLength = 100;
|
||||
private static final int testRegionSize = 1000;
|
||||
private final List<GATKSAMRecord> basicReads = new ArrayList<GATKSAMRecord>(20);
|
||||
private final ObjectList<GATKSAMRecord> basicReads = new ObjectArrayList<GATKSAMRecord>(20);
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private SAMFileHeader header;
|
||||
|
||||
|
|
@ -242,40 +247,67 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
read.setMappingQuality(30);
|
||||
read.setReadNegativeStrandFlag(i % 40 == 20);
|
||||
basicReads.add(read);
|
||||
}
|
||||
}
|
||||
|
||||
private class ConsensusCreationTest {
|
||||
public final int expectedNumberOfReads;
|
||||
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression;
|
||||
public final List<GATKSAMRecord> myReads = new ArrayList<GATKSAMRecord>(20);
|
||||
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads) {
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
|
||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||
|
||||
// first, add the basic reads to the collection
|
||||
myReads.addAll(basicReads);
|
||||
|
||||
// then add the permuted reads
|
||||
for ( final GenomeLoc loc : locs )
|
||||
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality));
|
||||
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
|
||||
}
|
||||
|
||||
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality) {
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
|
||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||
|
||||
// first, add the basic reads to the collection
|
||||
myReads.addAll(basicReads);
|
||||
|
||||
// then add the permuted reads
|
||||
for ( final GenomeLoc loc : locs )
|
||||
myReads.add(createVariantRead(loc, false, false, operator));
|
||||
}
|
||||
|
||||
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality,
|
||||
final boolean variantBaseShouldBeLowQuality, final CigarOperator operator) {
|
||||
|
||||
final int startPos = loc.getStart() - 50;
|
||||
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead" + startPos, 0, startPos, readLength);
|
||||
|
||||
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||
// create a mismatch
|
||||
bases[50] = 'C';
|
||||
// create a mismatch if requested
|
||||
if ( operator == CigarOperator.M )
|
||||
bases[50] = 'C';
|
||||
read.setReadBases(bases);
|
||||
|
||||
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
|
||||
if ( variantBaseShouldBeLowQuality )
|
||||
baseQuals[50] = (byte)10;
|
||||
read.setBaseQualities(baseQuals);
|
||||
final byte mappingQual = readShouldBeLowQuality ? (byte)10 : (byte)30;
|
||||
read.setMappingQuality(mappingQual);
|
||||
|
||||
if ( operator != CigarOperator.M ) {
|
||||
final List<CigarElement> elements = new ArrayList<CigarElement>(3);
|
||||
elements.add(new CigarElement(operator == CigarOperator.D ? 50 : 51, CigarOperator.M));
|
||||
elements.add(new CigarElement(1, operator));
|
||||
elements.add(new CigarElement(operator == CigarOperator.D ? 50 : 48, CigarOperator.M));
|
||||
read.setCigar(new Cigar(elements));
|
||||
}
|
||||
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -291,41 +323,61 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
// test high quality reads and bases
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 5)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11)});
|
||||
|
||||
// test low quality reads
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 1, 1)});
|
||||
|
||||
// test low quality bases
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1)});
|
||||
|
||||
// test mixture
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 3)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 3, 3)});
|
||||
|
||||
// test I/D operators
|
||||
// TODO -- uncomment this test when the deletion bug is fixed!
|
||||
// tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 5)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11)});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ConsensusCreation", enabled = true)
|
||||
public void testConsensusCreationTest(ConsensusCreationTest test) {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false, 1, false);
|
||||
// test WITHOUT het compression allowed
|
||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false, false);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
final Pair<Set<GATKSAMRecord>, CompressionStash> result = slidingWindow.close();
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close();
|
||||
|
||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads);
|
||||
|
||||
// test WITH het compression allowed
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false, true);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
result = slidingWindow.close();
|
||||
|
||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -353,8 +405,8 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "Downsampling", enabled = true)
|
||||
public void testDownsamplingTest(DSTest test) {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, test.dcov, ReduceReads.DownsampleStrategy.Normal, false, 1, false);
|
||||
final List<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, test.dcov, ReduceReads.DownsampleStrategy.Normal, false, false);
|
||||
final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
|
||||
|
||||
Assert.assertEquals(result.size(), Math.min(test.dcov, basicReads.size()));
|
||||
}
|
||||
|
|
@ -401,10 +453,10 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "ConsensusQuals", enabled = true)
|
||||
public void testConsensusQualsTest(QualsTest test) {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false, 1, false);
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false, false);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
final Pair<Set<GATKSAMRecord>, CompressionStash> result = slidingWindow.close();
|
||||
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close();
|
||||
|
||||
Assert.assertEquals(result.getFirst().size(), 1);
|
||||
final GATKSAMRecord read = result.getFirst().iterator().next();
|
||||
|
|
|
|||
|
|
@ -46,6 +46,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.bytes.ByteArrayList;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
|
@ -54,9 +56,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
public class SyntheticReadUnitTest extends BaseTest {
|
||||
final SAMFileHeader artificialSAMHeader = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1);
|
||||
final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
|
||||
|
|
@ -66,35 +65,32 @@ public class SyntheticReadUnitTest extends BaseTest {
|
|||
final int artificialRefStart = 1;
|
||||
final double artificialMappingQuality = 60;
|
||||
|
||||
final Random random = new Random(8854875);
|
||||
|
||||
|
||||
@Test
|
||||
public void testBaseCounts() {
|
||||
BaseIndex [] bases = new BaseIndex[] {BaseIndex.A,BaseIndex.A,BaseIndex.A,BaseIndex.A};
|
||||
Byte[] quals = new Byte[] {20, 20, 20, 20 };
|
||||
byte[] quals = new byte[] {20, 20, 20, 20 };
|
||||
|
||||
TestRead [] testReads = new TestRead [] {
|
||||
new TestRead(bases, quals, new Byte[] {100, 100, 100, 101}, new byte [] {100, 0, 0, 1}),
|
||||
new TestRead(bases, quals, new Byte[] {1, 100, 100, 0}, new byte [] {1, 99, 99, -1}),
|
||||
new TestRead(bases, quals, new Byte[] {127, 100, 0, 1}, new byte [] {127, -27, -127, -126}),
|
||||
new TestRead(bases, quals, new Byte[] {1, 127, 51, 126}, new byte [] {1, 126, 50, 125})};
|
||||
new TestRead(bases, quals, new byte[] {100, 100, 100, 101}, new byte [] {100, 0, 0, 1}),
|
||||
new TestRead(bases, quals, new byte[] {1, 100, 100, 0}, new byte [] {1, 99, 99, -1}),
|
||||
new TestRead(bases, quals, new byte[] {127, 100, 0, 1}, new byte [] {127, -27, -127, -126}),
|
||||
new TestRead(bases, quals, new byte[] {1, 127, 51, 126}, new byte [] {1, 126, 50, 125})};
|
||||
|
||||
for (TestRead testRead : testReads) {
|
||||
SyntheticRead syntheticRead = new SyntheticRead(Arrays.asList(testRead.getBases()), Arrays.asList(testRead.getCounts()), Arrays.asList(testRead.getQuals()), Arrays.asList(testRead.getInsQuals()), Arrays.asList(testRead.getDelQuals()), artificialMappingQuality, GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false);
|
||||
SyntheticRead syntheticRead = new SyntheticRead(new ObjectArrayList<BaseIndex>(testRead.getBases()), new ByteArrayList(testRead.getCounts()), new ByteArrayList(testRead.getQuals()), new ByteArrayList(testRead.getInsQuals()), new ByteArrayList(testRead.getDelQuals()), artificialMappingQuality, GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false);
|
||||
Assert.assertEquals(syntheticRead.convertBaseCounts(), testRead.getExpectedCounts());
|
||||
}
|
||||
}
|
||||
|
||||
private class TestRead {
|
||||
BaseIndex[] bases;
|
||||
Byte[] quals;
|
||||
Byte[] insQuals;
|
||||
Byte[] delQuals;
|
||||
Byte[] counts;
|
||||
byte [] expectedCounts;
|
||||
byte[] quals;
|
||||
byte[] insQuals;
|
||||
byte[] delQuals;
|
||||
byte[] counts;
|
||||
byte[] expectedCounts;
|
||||
|
||||
private TestRead(BaseIndex[] bases, Byte[] quals, Byte[] counts, byte[] expectedCounts) {
|
||||
private TestRead(BaseIndex[] bases, byte[] quals, byte[] counts, byte[] expectedCounts) {
|
||||
this.bases = bases;
|
||||
this.quals = quals;
|
||||
this.insQuals = quals;
|
||||
|
|
@ -107,19 +103,19 @@ private class TestRead {
|
|||
return bases;
|
||||
}
|
||||
|
||||
public Byte[] getQuals() {
|
||||
public byte[] getQuals() {
|
||||
return quals;
|
||||
}
|
||||
|
||||
public Byte[] getInsQuals() {
|
||||
public byte[] getInsQuals() {
|
||||
return insQuals;
|
||||
}
|
||||
|
||||
public Byte[] getDelQuals() {
|
||||
public byte[] getDelQuals() {
|
||||
return delQuals;
|
||||
}
|
||||
|
||||
public Byte[] getCounts() {
|
||||
public byte[] getCounts() {
|
||||
return counts;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/6/13
|
||||
*/
|
||||
public class BaseCoverageDistributionIntegrationTest extends WalkerTest {
|
||||
final static String REF = hg18Reference;
|
||||
final String bam = validationDataLocation + "small_bam_for_countloci.withRG.bam";
|
||||
|
||||
@DataProvider(name = "BasicArguments")
|
||||
public Object[][] basicArgumentsDataProvider() {
|
||||
return new Object[][] {
|
||||
// Tests simple counting on one interval with everything in the same contig including tallying of uncovered bases.
|
||||
{"testSingleInterval ", "-L chr1:90000-100000", "45368696dc008d1a07fb2b05fbafd1f4"},
|
||||
// Tests specially the tallying of uncovered bases across multiple intervals. Makes sure it's only adding the bases present in the intervals requested.
|
||||
{"testMultipleIntervals ", "-L chr1:10-20 -L chr1:40-100 -L chr1:10,000-11,000 -L chr1:40,000-60,000 -L chr1:76,000-99,000 ", "45dafe59e5e54451b88c914d6ecbddc6"},
|
||||
// Tests adding the entire genome around every covered base as uncovered. Especially tests the tally in the beginning and end of the run, adding up all chromosomes not visited (this test file only has reads on chr1).
|
||||
{"testNoIntervals ", "", "c399f780f0b7da6be2614d837c368d1c"},
|
||||
|
||||
// the following three tests are equivalent but now include the filtered distribution option. These tests are aimed at the filtered distribution output.
|
||||
{"testFilteredSingleInterval ", "-fd -L chr1:90000-100000", "7017cf191bf54e85111972a882e1d5fa"},
|
||||
{"testFilteredMultipleIntervals ", "-fd -L chr1:10-20 -L chr1:40-100 -L chr1:10,000-11,000 -L chr1:40,000-60,000 -L chr1:76,000-99,000 ", "75d11cc02210676d6c19939fb0b9ab2e"},
|
||||
{"testFilteredNoIntervals ", "-fd ", "e7abfa6c7be493de4557a64f66688148"},
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BasicArguments", enabled = true)
|
||||
private void BaseCoverageDistributionTest(String testName, String args, String md5) {
|
||||
String base = String.format("-T BaseCoverageDistribution -R %s -I %s ", REF, bam) + " -o %s ";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(base + args, Arrays.asList(md5));
|
||||
executeTest(testName, spec);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,299 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
public class BiasedDownsamplingIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommand1 = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommand2 = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:1,000,000-5,000,000";
|
||||
private final static String baseCommand3 = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:4,000,000-5,000,000";
|
||||
private final String ArtificalBAMLocation = privateTestDir + "ArtificallyContaminatedBams/";
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing UnifiedGenotyper contamination down-sampling
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testContaminationDownsamplingFlat() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contamination 0.20", 1,
|
||||
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
|
||||
executeTest("test contamination_percentage_to_filter 0.20", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testContaminationDownsamplingFlatAndPerSample() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_per_sample_file " + ArtificalBAMLocation + "NA12878.NA19240.contam.txt --contamination_fraction_to_filter 0.10", 1,
|
||||
Arrays.asList("53395814dd6990448a01a294ccd69bd2"));
|
||||
executeTest("test contamination_percentage_to_filter per-sample and .20 overall", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testContaminationDownsamplingPerSampleOnly() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contaminationFile " + ArtificalBAMLocation + "NA19240.contam.txt", 1,
|
||||
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
|
||||
executeTest("test contamination_percentage_to_filter per-sample", spec);
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing UnifiedGenotyper contamination down-sampling on BAMs with artificially created contaminated.
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test(enabled = false)
|
||||
private void testDefaultContamination() {
|
||||
final String bam1 = "NA11918.with.1.NA12842.reduced.bam";
|
||||
final String bam2 = "NA12842.with.1.NA11918.reduced.bam";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s ", 1,
|
||||
Arrays.asList("e2e5a8dd313f8d7e382e7d49dfac59a2"));
|
||||
executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " with default downsampling.", spec);
|
||||
}
|
||||
|
||||
private void testFlatContamination(final String bam1, final String bam2, final Double downsampling, final String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination " + downsampling.toString(), 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " downsampling " + downsampling.toString(), spec);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase1() {
|
||||
testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "e2e5a8dd313f8d7e382e7d49dfac59a2");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase2() {
|
||||
testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "549737002f98775fea8f46e7ea174dde");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase3() {
|
||||
testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "529d82c2a33fcc303a5dc55de2d56979");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase4() {
|
||||
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.1, "b5689972fbb7d230a372ee5f0da1c6d7");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase5() {
|
||||
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.2, "9dceee2e921b53fbc1ce137a7e0b7b74");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase6() {
|
||||
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.3, "d6a74061033503af80dcaea065bfa075");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase7() {
|
||||
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "7d1b5efab58a1b8f9d99fcf5af82f15a");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase8() {
|
||||
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "a7f8d5c79626aff59d7f426f79d8816e");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testFlatContaminationCase9() {
|
||||
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.3, "fcf482398b7c908e3e2d1e4d5da6377b");
|
||||
}
|
||||
|
||||
private void testPerSampleContamination(String bam1, String bam2, String persampleFile, final String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contaminationFile " + persampleFile, 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test contamination on Artificial Contamination (per-sample) on " + bam1 + " and " + bam2 + " with " + persampleFile, spec);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleContaminationCase1() {
|
||||
testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.1.txt", "e00278527a294833259e9e411728e395");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleContaminationCase2() {
|
||||
testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.2.txt", "a443e793f0b0e2ffce1b751634d706e2");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleContaminationCase3() {
|
||||
testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.3.txt", "e11d83a7815ce757afbcf7689568cb25");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleContaminationCase4() {
|
||||
testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.4.txt", "615042eeeffe042bd1c86279d34f80b6");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleContaminationCase5() {
|
||||
testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.1.txt", "9bc99fc79ca34744bf26cb19ee4ef44d");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleContaminationCase6() {
|
||||
testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.2.txt", "143626fe5fce765d6c997a64f058a813");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleContaminationCase7() {
|
||||
testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.3.txt", "f2593674cef894eda4e0be9cf3158f57");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleContaminationCase8() {
|
||||
testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.4.txt", "fb7ce0740767ae3896b3e552026da1e4");
|
||||
}
|
||||
|
||||
|
||||
private void testPerSampleEqualsFlat(final String bam1, final String bam2, final String persampleFile, final Double downsampling, final String md5) {
|
||||
final String command = baseCommand3 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s ";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList(md5));
|
||||
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
|
||||
|
||||
rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
|
||||
executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec);
|
||||
|
||||
spec = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList(md5));
|
||||
|
||||
rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
|
||||
executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec);
|
||||
|
||||
}
|
||||
|
||||
// verify that inputing a file with an effectively flat contamination level is equivalent to handing in a flat contamination level
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleEqualsFlatContaminationCase1() {
|
||||
testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0, "");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleEqualsFlatContaminationCase2() {
|
||||
testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15, "");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testPerSampleEqualsFlatContaminationCase3() {
|
||||
testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3, "");
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing HaplotypeCaller Contamination Removal
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testHCContaminationDownsamplingFlat() {
|
||||
final String baseCommand = "-T HaplotypeCaller -R " + b36KGReference + " --no_cmdline_in_header --dbsnp " + b36dbSNP129;
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contamination 0.20", 1,
|
||||
Arrays.asList("c3a253467ead7b1cfe9fd9dd310828b1"));
|
||||
executeTest("HC calling with contamination_percentage_to_filter 0.20", spec);
|
||||
}
|
||||
|
||||
// HaplotypeCaller can only (currently) use flat contamination reduction, not per-sample. Until that is implemented, this test
|
||||
@Test(enabled = false)
|
||||
public void testHCCannotProcessPerSampleContamination() {
|
||||
final String baseCommand = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:3,000,000-5,000,000";
|
||||
final String bam1 = "NA11918.with.1.NA12842.reduced.bam";
|
||||
final String perSampleFile = ArtificalBAMLocation + "contamination.case.1.txt";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand + " -I " + ArtificalBAMLocation + bam1 + " -o %s -contaminationFile " + perSampleFile, 1,
|
||||
UserException.class);
|
||||
executeTest("HC should fail on per-Sample contamination removal.", spec);
|
||||
}
|
||||
|
||||
|
||||
private void testHCFlatContamination(final String bam1, final String bam2, final Double downsampling, final String md5) {
|
||||
final String baseCommand = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:3,000,000-5,000,000";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination " + downsampling.toString(), 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("HC test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " downsampling " + downsampling.toString(), spec);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testHCFlatContaminationCase1() {
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "c3e695381d8627e3922d8c642b66c3ce");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testHCFlatContaminationCase2() {
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "002d2b45336d88d7c04e19f9f26e29d9");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testHCFlatContaminationCase3() {
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "1809a33ac112d1a3bd7a071c566794dd");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class UnifiedGenotyperEngineUnitTest extends BaseTest {
|
||||
private final static double TOLERANCE = 1e-5;
|
||||
private UnifiedGenotyperEngine ugEngine;
|
||||
|
||||
@BeforeClass
|
||||
public void setUp() throws Exception {
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setArguments(new GATKArgumentCollection());
|
||||
final UnifiedArgumentCollection args = new UnifiedArgumentCollection();
|
||||
final Set<String> fakeSamples = Collections.singleton("fake");
|
||||
ugEngine = new UnifiedGenotyperEngine(engine, fakeSamples, args);
|
||||
}
|
||||
|
||||
private UnifiedGenotyperEngine getEngine() {
|
||||
return ugEngine;
|
||||
}
|
||||
|
||||
@DataProvider(name = "ReferenceQualityCalculation")
|
||||
public Object[][] makeReferenceQualityCalculation() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
final double p = Math.log10(0.5);
|
||||
for ( final double theta : Arrays.asList(0.1, 0.01, 0.001) ) {
|
||||
for ( final int depth : Arrays.asList(0, 1, 2, 10, 100, 1000, 10000) ) {
|
||||
final double log10PofNonRef = Math.log10(theta / 2.0) + MathUtils.log10BinomialProbability(depth, 0, p);
|
||||
final double log10POfRef = MathUtils.log10OneMinusX(Math.pow(10.0, log10PofNonRef));
|
||||
tests.add(new Object[]{depth, theta, log10POfRef});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ReferenceQualityCalculation")
|
||||
public void testReferenceQualityCalculation(final int depth, final double theta, final double expected) {
|
||||
final double ref = getEngine().estimateLog10ReferenceConfidenceForOneSample(depth, theta);
|
||||
Assert.assertTrue(MathUtils.goodLog10Probability(ref), "Reference calculation wasn't a well formed log10 prob " + ref);
|
||||
Assert.assertEquals(ref, expected, TOLERANCE, "Failed reference confidence for single sample");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: delangel
|
||||
* Date: 4/5/12
|
||||
* Time: 11:28 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTest {
|
||||
|
||||
private final UnifiedGenotyperGeneralPloidyTestExecutor executor = new UnifiedGenotyperGeneralPloidyTestExecutor();
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSNP_ACS_Pools() {
|
||||
executor.PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES", "LSV_SNP_ACS", "SNP", "df0e67c975ef74d593f1c704daab1705");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "71f16e19b7d52e8edee46f4121e59f54");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "3f7d763c654f1d708323f369ea4a099b");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "5812da66811887d834d0379a33e655c0");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.*;
|
||||
|
||||
public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTest {
|
||||
|
||||
private final UnifiedGenotyperGeneralPloidyTestExecutor executor = new UnifiedGenotyperGeneralPloidyTestExecutor();
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","3a321896c4b8b6457973c76c486da4d4");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
|
||||
}
|
||||
}
|
||||
|
|
@ -47,90 +47,47 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: delangel
|
||||
* Date: 4/5/12
|
||||
* Time: 11:28 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
||||
public class UnifiedGenotyperGeneralPloidyTestExecutor extends WalkerTest {
|
||||
final static String REF = b37KGReference;
|
||||
final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
|
||||
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
|
||||
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
|
||||
final String REFSAMPLE_NAME = "NA12878";
|
||||
final String MTINTERVALS = "MT:1-1000";
|
||||
final String LSVINTERVALS = "20:40,500,000-41,000,000";
|
||||
final String LSVINTERVALS_SHORT = "20:40,500,000-40,501,000";
|
||||
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
|
||||
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
|
||||
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
|
||||
final static String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
|
||||
final static String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
|
||||
final static String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
|
||||
final static String REFSAMPLE_NAME = "NA12878";
|
||||
final static String MTINTERVALS = "MT:1-1000";
|
||||
final static String LSVINTERVALS = "20:40,500,000-41,000,000";
|
||||
final static String LSVINTERVALS_SHORT = "20:40,500,000-40,501,000";
|
||||
final static String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
|
||||
final static String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
|
||||
final static String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
|
||||
|
||||
private void PC_MT_Test(String bam, String args, String name, String md5) {
|
||||
public void PC_MT_Test(String bam, String args, String name, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
|
||||
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test(String args, String name, String model, String md5) {
|
||||
public void PC_LSV_Test(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
|
||||
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test_short(String args, String name, String model, String md5) {
|
||||
public void PC_LSV_Test_short(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
|
||||
REF, LSV_BAM, LSVINTERVALS_SHORT, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
||||
public void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
|
||||
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSNP_ACS_Pools() {
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d1c113a17e36762d27eb27fd12528e52");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ab043eed87fadbe5761a55a4912b19ac");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","95d48e0680019d5406ff9adb8f2ff3ca");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","8a4ddd64c4e9c42b4a8622582fcfa9c9");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing indel caller
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
// Basic indel testing with SLX data
|
||||
@Test
|
||||
public void testSimpleIndels() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam" +
|
||||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("1cb469b9cc8e6c70430021540bf1af8b"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
||||
// Basic indel testing with SLX data
|
||||
@Test
|
||||
public void testIndelsWithLowMinAlleleCnt() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam" +
|
||||
" -o %s" +
|
||||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("c7e59f9ab718df4c604626a0f51af606"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiTechnologyIndels() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
|
||||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("4bebbe4ed4a7554285a3b4bb7311101c"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("86880ec78755ae91cb5bb34a0631a32c"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("2584d5e3ade1b548f1fe9cdcafbe1b28"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
@Test(timeOut = 20*1000*60) // this guy can take a long time because it's two steps, so give it 12 minutes
|
||||
public void testMultiSampleIndels1() {
|
||||
// since we're going to test the MD5s with GGA only do one here
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList(""));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
|
||||
Arrays.asList("08b3a85be00c8f6a4fefd3c671463ecf"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGGAwithNoEvidenceInReads() {
|
||||
final String vcf = "small.indel.test.vcf";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBaseIndelQualityScores() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 +
|
||||
" -I " + privateTestDir + "NA12878.100kb.BQSRv2.example.bam" +
|
||||
" -o %s" +
|
||||
" -L 20:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("8a7966e4b67334bca6083670c5a16b67"));
|
||||
|
||||
executeTest(String.format("test UG with base indel quality scores"), spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing MinIndelFraction
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
|
||||
+ "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("556c214366e82e4682e753ce93307a4e"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("1df02b805d9dfbd532fa3632875a989d"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -51,10 +51,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
// ********************************************************************************** //
|
||||
// Note that this class also serves as an integration test for the VariantAnnotator! //
|
||||
|
|
@ -63,128 +61,8 @@ import java.util.List;
|
|||
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||
private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam";
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing normal calling
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("847605f4efafef89529fe0e496315edd"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("dff4412a074940d26994f9552476b209"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("35479a79e1ce7c15493bd77e58cadcaa"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("d915535c1458733f09f82670092fcab6"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("1e61de694b51d7c0f26da5179ee6bb0c"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing compressed output
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "e6e33f0ebabab027eabed51fe9a08da9";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("gz"), Arrays.asList(COMPRESSED_OUTPUT_MD5));
|
||||
executeTest("test compressed output", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing parallelization
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testParallelization() {
|
||||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "d408b4661b820ed86272415b8ea08780";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (single thread)", spec1);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (2 threads)", spec2);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (4 threads)", spec3);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing parameters
|
||||
|
|
@ -268,12 +146,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "bdc8760d7ae1e01c0510b12c1e6fcfa3" );
|
||||
testHeterozosity( 0.01, "ffc1f83a045dc09360e11de7a8efd159" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "f508f06a47305e11e62776615cb14fe3" );
|
||||
testHeterozosity( 1.0 / 1850, "5426a98df9f5fd70aef295d889c4e4f1" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -283,6 +161,54 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest(String.format("test heterozyosity[%s]", arg), spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing compressed output
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "d5a7326fdcf6d441b73c381912ad3a2a";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("gz"), Arrays.asList(COMPRESSED_OUTPUT_MD5));
|
||||
executeTest("test compressed output", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing parallelization
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testParallelization() {
|
||||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "d408b4661b820ed86272415b8ea08780";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (single thread)", spec1);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (2 threads)", spec2);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (4 threads)", spec3);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -297,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("13d91059f58fb50a07a6a34b9438a45b"));
|
||||
Arrays.asList("68961b19a29ae224059c33ef41cdcb58"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -316,114 +242,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("07d8b77a5f6697f3a47a4f1efb0dcf50"));
|
||||
Arrays.asList("9fcb234f7573209dec4dae86db091efd"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing indel caller
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
// Basic indel testing with SLX data
|
||||
@Test
|
||||
public void testSimpleIndels() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam" +
|
||||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("0f026d2e568172cf32813cc54ea7ba23"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
||||
// Basic indel testing with SLX data
|
||||
@Test
|
||||
public void testIndelsWithLowMinAlleleCnt() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam" +
|
||||
" -o %s" +
|
||||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("e7ad858e9d6617534761918561f3ed4c"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiTechnologyIndels() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
|
||||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("8231ae37b52b927db9fc1e5c221b0ba0"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("9430fe36789a791fcff6162f768ae563"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("8d8dbf483526b0b309f5728619a74a86"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("a47810de2f6ef8087f4644064a0814bc"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("53b8d2b0fa63c5d1019855e8e0db28f0"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGGAwithNoEvidenceInReads() {
|
||||
final String vcf = "small.indel.test.vcf";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBaseIndelQualityScores() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 +
|
||||
" -I " + privateTestDir + "NA12878.100kb.BQSRv2.example.bam" +
|
||||
" -o %s" +
|
||||
" -L 20:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("1e0d2c15546c3b0959b00ffb75488b56"));
|
||||
|
||||
executeTest(String.format("test UG with base indel quality scores"), spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing SnpEff
|
||||
|
|
@ -440,39 +263,6 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing MinIndelFraction
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
|
||||
+ "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("db3026c49a3de7a5cb9a3d77635d0706"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("7ab8e5ee15ab98d6756b0eea0f4d3798"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing Ns in CIGAR
|
||||
|
|
@ -486,52 +276,4 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing reduced reads
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("8b9a9fc2e7150acbe2dac91b4620f304"));
|
||||
executeTest("test calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "b5991dddbfb59366614ff8819062649f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "a85c110fcac9574a54c7daccb1e2d5ae");
|
||||
}
|
||||
|
||||
|
||||
private void testReducedCalling(final String model, final String md5) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test calling on a ReducedRead BAM with " + model, spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing contamination down-sampling
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testContaminationDownsampling() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
|
||||
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
|
||||
executeTest("test contamination_percentage_to_filter 0.20", spec);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing normal calling
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("2f15ef1ead56d875a3f1d53772f52b3a"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("33ab66c2f062cfa1f7fcc077165f778c"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("9fac00485419878749b03706ae6b852f"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("d915535c1458733f09f82670092fcab6"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("eb9604b77a7d6baab60c81ac3db5e47b"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("de2c5707c1805d17d70acaecd36b7372"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest {
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing reduced reads
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("8b9a9fc2e7150acbe2dac91b4620f304"));
|
||||
executeTest("test calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "b5991dddbfb59366614ff8819062649f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "acde5694a74f867256a54a26cbebbf21");
|
||||
}
|
||||
|
||||
|
||||
private void testReducedCalling(final String model, final String md5) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test calling on a ReducedRead BAM with " + model, spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -52,19 +52,21 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
* Date: 3/27/12
|
||||
*/
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
||||
public class DeBruijnAssemblerUnitTest extends BaseTest {
|
||||
|
||||
|
||||
private class MergeNodesWithNoVariationTestProvider extends TestDataProvider {
|
||||
|
|
@ -77,16 +79,16 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
|||
KMER_LENGTH = kmer;
|
||||
}
|
||||
|
||||
public DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph() {
|
||||
public DeBruijnAssemblyGraph expectedGraph() {
|
||||
DeBruijnVertex v = new DeBruijnVertex(sequence, KMER_LENGTH);
|
||||
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
DeBruijnAssemblyGraph graph = new DeBruijnAssemblyGraph();
|
||||
graph.addVertex(v);
|
||||
return graph;
|
||||
}
|
||||
|
||||
public DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> calcGraph() {
|
||||
public DeBruijnAssemblyGraph calcGraph() {
|
||||
|
||||
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
DeBruijnAssemblyGraph graph = new DeBruijnAssemblyGraph();
|
||||
final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
|
||||
for (int i = 0; i < kmersInSequence - 1; i++) {
|
||||
// get the kmers
|
||||
|
|
@ -95,9 +97,9 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
|||
final byte[] kmer2 = new byte[KMER_LENGTH];
|
||||
System.arraycopy(sequence, i+1, kmer2, 0, KMER_LENGTH);
|
||||
|
||||
SimpleDeBruijnAssembler.addKmersToGraph(graph, kmer1, kmer2, false);
|
||||
graph.addKmersToGraph(kmer1, kmer2, false);
|
||||
}
|
||||
SimpleDeBruijnAssembler.mergeNodes(graph);
|
||||
DeBruijnAssembler.mergeNodes(graph);
|
||||
return graph;
|
||||
}
|
||||
}
|
||||
|
|
@ -124,8 +126,8 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testPruneGraph() {
|
||||
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
DeBruijnAssemblyGraph graph = new DeBruijnAssemblyGraph();
|
||||
DeBruijnAssemblyGraph expectedGraph = new DeBruijnAssemblyGraph();
|
||||
|
||||
DeBruijnVertex v = new DeBruijnVertex("ATGG".getBytes(), 1);
|
||||
DeBruijnVertex v2 = new DeBruijnVertex("ATGGA".getBytes(), 1);
|
||||
|
|
@ -154,12 +156,12 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
|||
expectedGraph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
|
||||
expectedGraph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
|
||||
|
||||
SimpleDeBruijnAssembler.pruneGraph(graph, 2);
|
||||
DeBruijnAssembler.pruneGraph(graph, 2);
|
||||
|
||||
Assert.assertTrue(graphEquals(graph, expectedGraph));
|
||||
|
||||
graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
graph = new DeBruijnAssemblyGraph();
|
||||
expectedGraph = new DeBruijnAssemblyGraph();
|
||||
|
||||
graph.addVertex(v);
|
||||
graph.addVertex(v2);
|
||||
|
|
@ -182,103 +184,12 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
|||
expectedGraph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
|
||||
expectedGraph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
|
||||
|
||||
SimpleDeBruijnAssembler.pruneGraph(graph, 2);
|
||||
DeBruijnAssembler.pruneGraph(graph, 2);
|
||||
|
||||
Assert.assertTrue(graphEquals(graph, expectedGraph));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testEliminateNonRefPaths() {
|
||||
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
|
||||
DeBruijnVertex v = new DeBruijnVertex("ATGG".getBytes(), 1);
|
||||
DeBruijnVertex v2 = new DeBruijnVertex("ATGGA".getBytes(), 1);
|
||||
DeBruijnVertex v3 = new DeBruijnVertex("ATGGT".getBytes(), 1);
|
||||
DeBruijnVertex v4 = new DeBruijnVertex("ATGGG".getBytes(), 1);
|
||||
DeBruijnVertex v5 = new DeBruijnVertex("ATGGC".getBytes(), 1);
|
||||
DeBruijnVertex v6 = new DeBruijnVertex("ATGGCCCCCC".getBytes(), 1);
|
||||
|
||||
graph.addVertex(v);
|
||||
graph.addVertex(v2);
|
||||
graph.addVertex(v3);
|
||||
graph.addVertex(v4);
|
||||
graph.addVertex(v5);
|
||||
graph.addVertex(v6);
|
||||
graph.addEdge(v, v2, new DeBruijnEdge(false));
|
||||
graph.addEdge(v2, v3, new DeBruijnEdge(true));
|
||||
graph.addEdge(v3, v4, new DeBruijnEdge(true));
|
||||
graph.addEdge(v4, v5, new DeBruijnEdge(true));
|
||||
graph.addEdge(v5, v6, new DeBruijnEdge(false));
|
||||
|
||||
expectedGraph.addVertex(v2);
|
||||
expectedGraph.addVertex(v3);
|
||||
expectedGraph.addVertex(v4);
|
||||
expectedGraph.addVertex(v5);
|
||||
expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
|
||||
expectedGraph.addEdge(v3, v4, new DeBruijnEdge());
|
||||
expectedGraph.addEdge(v4, v5, new DeBruijnEdge());
|
||||
|
||||
SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
|
||||
|
||||
Assert.assertTrue(graphEquals(graph, expectedGraph));
|
||||
|
||||
|
||||
|
||||
|
||||
graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
|
||||
graph.addVertex(v);
|
||||
graph.addVertex(v2);
|
||||
graph.addVertex(v3);
|
||||
graph.addVertex(v4);
|
||||
graph.addVertex(v5);
|
||||
graph.addVertex(v6);
|
||||
graph.addEdge(v, v2, new DeBruijnEdge(true));
|
||||
graph.addEdge(v2, v3, new DeBruijnEdge(true));
|
||||
graph.addEdge(v4, v5, new DeBruijnEdge(false));
|
||||
graph.addEdge(v5, v6, new DeBruijnEdge(false));
|
||||
|
||||
expectedGraph.addVertex(v);
|
||||
expectedGraph.addVertex(v2);
|
||||
expectedGraph.addVertex(v3);
|
||||
expectedGraph.addEdge(v, v2, new DeBruijnEdge());
|
||||
expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
|
||||
|
||||
SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
|
||||
|
||||
Assert.assertTrue(graphEquals(graph, expectedGraph));
|
||||
|
||||
|
||||
|
||||
graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
|
||||
graph.addVertex(v);
|
||||
graph.addVertex(v2);
|
||||
graph.addVertex(v3);
|
||||
graph.addVertex(v4);
|
||||
graph.addVertex(v5);
|
||||
graph.addVertex(v6);
|
||||
graph.addEdge(v, v2, new DeBruijnEdge(true));
|
||||
graph.addEdge(v2, v3, new DeBruijnEdge(true));
|
||||
graph.addEdge(v4, v5, new DeBruijnEdge(false));
|
||||
graph.addEdge(v5, v6, new DeBruijnEdge(false));
|
||||
graph.addEdge(v4, v2, new DeBruijnEdge(false));
|
||||
|
||||
expectedGraph.addVertex(v);
|
||||
expectedGraph.addVertex(v2);
|
||||
expectedGraph.addVertex(v3);
|
||||
expectedGraph.addEdge(v, v2, new DeBruijnEdge());
|
||||
expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
|
||||
|
||||
SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
|
||||
|
||||
Assert.assertTrue(graphEquals(graph, expectedGraph));
|
||||
}
|
||||
|
||||
private boolean graphEquals(DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> g1, DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> g2) {
|
||||
private boolean graphEquals(DeBruijnAssemblyGraph g1, DeBruijnAssemblyGraph g2) {
|
||||
if( !(g1.vertexSet().containsAll(g2.vertexSet()) && g2.vertexSet().containsAll(g1.vertexSet())) ) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -298,4 +209,58 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testReferenceCycleGraph() {
|
||||
String refCycle = "ATCGAGGAGAGCGCCCCGAGATATATATATATATATTTGCGAGCGCGAGCGTTTTAAAAATTTTAGACGGAGAGATATATATATATGGGAGAGGGGATATATATATATCCCCCC";
|
||||
String noCycle = "ATCGAGGAGAGCGCCCCGAGATATTATTTGCGAGCGCGAGCGTTTTAAAAATTTTAGACGGAGAGATGGGAGAGGGGATATATAATATCCCCCC";
|
||||
final DeBruijnAssemblyGraph g1 = DeBruijnAssembler.createGraphFromSequences(new ArrayList<GATKSAMRecord>(), 10, new Haplotype(refCycle.getBytes(), true), false);
|
||||
final DeBruijnAssemblyGraph g2 = DeBruijnAssembler.createGraphFromSequences(new ArrayList<GATKSAMRecord>(), 10, new Haplotype(noCycle.getBytes(), true), false);
|
||||
|
||||
Assert.assertTrue(g1 == null, "Reference cycle graph should return null during creation.");
|
||||
Assert.assertTrue(g2 != null, "Reference non-cycle graph should not return null during creation.");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLeftAlignCigarSequentially() {
|
||||
String preRefString = "GATCGATCGATC";
|
||||
String postRefString = "TTT";
|
||||
String refString = "ATCGAGGAGAGCGCCCCG";
|
||||
String indelString1 = "X";
|
||||
String indelString2 = "YZ";
|
||||
int refIndel1 = 10;
|
||||
int refIndel2 = 12;
|
||||
|
||||
for ( final int indelSize1 : Arrays.asList(1, 2, 3, 4) ) {
|
||||
for ( final int indelOp1 : Arrays.asList(1, -1) ) {
|
||||
for ( final int indelSize2 : Arrays.asList(1, 2, 3, 4) ) {
|
||||
for ( final int indelOp2 : Arrays.asList(1, -1) ) {
|
||||
|
||||
Cigar expectedCigar = new Cigar();
|
||||
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
|
||||
expectedCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
|
||||
expectedCigar.add(new CigarElement((indelOp1 < 0 ? refIndel1 - indelSize1 : refIndel1), CigarOperator.M));
|
||||
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
|
||||
expectedCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
|
||||
expectedCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 - indelSize2) * 2 : refIndel2 * 2), CigarOperator.M));
|
||||
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
|
||||
|
||||
Cigar givenCigar = new Cigar();
|
||||
givenCigar.add(new CigarElement(refString.length() + refIndel1/2, CigarOperator.M));
|
||||
givenCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
|
||||
givenCigar.add(new CigarElement((indelOp1 < 0 ? (refIndel1/2 - indelSize1) : refIndel1/2) + refString.length() + refIndel2/2 * 2, CigarOperator.M));
|
||||
givenCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
|
||||
givenCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2/2 - indelSize2) * 2 : refIndel2/2 * 2) + refString.length(), CigarOperator.M));
|
||||
|
||||
String theRef = preRefString + refString + Utils.dupString(indelString1, refIndel1) + refString + Utils.dupString(indelString2, refIndel2) + refString + postRefString;
|
||||
String theRead = refString + Utils.dupString(indelString1, refIndel1 + indelOp1 * indelSize1) + refString + Utils.dupString(indelString2, refIndel2 + indelOp2 * indelSize2) + refString;
|
||||
|
||||
Cigar calculatedCigar = DeBruijnAssembler.leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(givenCigar), theRef.getBytes(), theRead.getBytes(), preRefString.length(), 0);
|
||||
Assert.assertEquals(AlignmentUtils.consolidateCigar(calculatedCigar).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar strings do not match!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -44,160 +44,80 @@
|
|||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pairhmm;
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin, carneiro
|
||||
* Date: 10/16/12
|
||||
* User: rpoplin
|
||||
* Date: 2/8/13
|
||||
*/
|
||||
|
||||
public class CachingPairHMM extends OriginalPairHMM {
|
||||
public class DeBruijnAssemblyGraphUnitTest {
|
||||
private class GetReferenceBytesTestProvider extends BaseTest.TestDataProvider {
|
||||
public byte[] refSequence;
|
||||
public byte[] altSequence;
|
||||
public int KMER_LENGTH;
|
||||
|
||||
double[][] constantMatrix = null; // The cache in the CachingPairHMM
|
||||
double[][] distanceMatrix = null; // The cache in the CachingPairHMM
|
||||
|
||||
protected static final double [] firstRowConstantMatrix = {
|
||||
QualityUtils.qualToProbLog10((byte) (DEFAULT_GOP + DEFAULT_GOP)),
|
||||
QualityUtils.qualToProbLog10(DEFAULT_GCP),
|
||||
QualityUtils.qualToErrorProbLog10(DEFAULT_GOP),
|
||||
QualityUtils.qualToErrorProbLog10(DEFAULT_GCP),
|
||||
0.0,
|
||||
0.0
|
||||
};
|
||||
|
||||
@Override
|
||||
public void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH ) {
|
||||
|
||||
super.initialize(READ_MAX_LENGTH, HAPLOTYPE_MAX_LENGTH);
|
||||
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||
final int X_METRIC_LENGTH = READ_MAX_LENGTH + 2;
|
||||
final int Y_METRIC_LENGTH = HAPLOTYPE_MAX_LENGTH + 2;
|
||||
|
||||
constantMatrix = new double[X_METRIC_LENGTH][6];
|
||||
distanceMatrix = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
|
||||
// fill in the first row
|
||||
for( int jjj = 2; jjj < Y_METRIC_LENGTH; jjj++ ) {
|
||||
updateCell(1, jjj, 0.0, firstRowConstantMatrix, matchMetricArray, XMetricArray, YMetricArray);
|
||||
public GetReferenceBytesTestProvider(String ref, String alt, int kmer) {
|
||||
super(GetReferenceBytesTestProvider.class, String.format("Testing reference bytes. kmer = %d, ref = %s, alt = %s", kmer, ref, alt));
|
||||
refSequence = ref.getBytes();
|
||||
altSequence = alt.getBytes();
|
||||
KMER_LENGTH = kmer;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public double computeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
|
||||
final byte[] readBases,
|
||||
final byte[] readQuals,
|
||||
final byte[] insertionGOP,
|
||||
final byte[] deletionGOP,
|
||||
final byte[] overallGCP,
|
||||
final int hapStartIndex,
|
||||
final boolean recacheReadValues ) {
|
||||
|
||||
if( recacheReadValues ) {
|
||||
initializeConstants( insertionGOP, deletionGOP, overallGCP );
|
||||
public byte[] expectedReferenceBytes() {
|
||||
return refSequence;
|
||||
}
|
||||
initializeDistanceMatrix( haplotypeBases, readBases, readQuals, hapStartIndex );
|
||||
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||
final int X_METRIC_LENGTH = readBases.length + 2;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
|
||||
|
||||
for (int i = 2; i < X_METRIC_LENGTH; i++) {
|
||||
for (int j = hapStartIndex+1; j < Y_METRIC_LENGTH; j++) {
|
||||
updateCell(i, j, distanceMatrix[i][j], constantMatrix[i], matchMetricArray, XMetricArray, YMetricArray);
|
||||
}
|
||||
}
|
||||
|
||||
// final probability is the log10 sum of the last element in all three state arrays
|
||||
final int endI = X_METRIC_LENGTH - 1;
|
||||
final int endJ = Y_METRIC_LENGTH - 1;
|
||||
return MathUtils.approximateLog10SumLog10(matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the matrix that holds all the constants related to the editing
|
||||
* distance between the read and the haplotype.
|
||||
*
|
||||
* @param haplotypeBases the bases of the haplotype
|
||||
* @param readBases the bases of the read
|
||||
* @param readQuals the base quality scores of the read
|
||||
* @param startIndex where to start updating the distanceMatrix (in case this read is similar to the previous read)
|
||||
*/
|
||||
public void initializeDistanceMatrix( final byte[] haplotypeBases,
|
||||
final byte[] readBases,
|
||||
final byte[] readQuals,
|
||||
final int startIndex ) {
|
||||
|
||||
// initialize the pBaseReadLog10 matrix for all combinations of read x haplotype bases
|
||||
// Abusing the fact that java initializes arrays with 0.0, so no need to fill in rows and columns below 2.
|
||||
|
||||
for (int i = 0; i < readBases.length; i++) {
|
||||
final byte x = readBases[i];
|
||||
final byte qual = readQuals[i];
|
||||
for (int j = startIndex; j < haplotypeBases.length; j++) {
|
||||
final byte y = haplotypeBases[j];
|
||||
distanceMatrix[i+2][j+2] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ?
|
||||
QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
|
||||
public byte[] calculatedReferenceBytes() {
|
||||
DeBruijnAssemblyGraph graph = new DeBruijnAssemblyGraph();
|
||||
graph.addSequenceToGraph(refSequence, KMER_LENGTH, true);
|
||||
if( altSequence.length > 0 ) {
|
||||
graph.addSequenceToGraph(altSequence, KMER_LENGTH, false);
|
||||
}
|
||||
return graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the matrix that holds all the constants related to quality scores.
|
||||
*
|
||||
* @param insertionGOP insertion quality scores of the read
|
||||
* @param deletionGOP deletion quality scores of the read
|
||||
* @param overallGCP overall gap continuation penalty
|
||||
*/
|
||||
public void initializeConstants( final byte[] insertionGOP,
|
||||
final byte[] deletionGOP,
|
||||
final byte[] overallGCP ) {
|
||||
@DataProvider(name = "GetReferenceBytesTestProvider")
|
||||
public Object[][] GetReferenceBytesTests() {
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "", 3);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "", 4);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "", 5);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "", 6);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "", 7);
|
||||
new GetReferenceBytesTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", "", 6);
|
||||
new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "", 66);
|
||||
new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "", 76);
|
||||
|
||||
final int l = insertionGOP.length;
|
||||
constantMatrix[1] = firstRowConstantMatrix;
|
||||
for (int i = 0; i < l; i++) {
|
||||
final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE);
|
||||
constantMatrix[i+2][0] = QualityUtils.qualToProbLog10((byte) qualIndexGOP);
|
||||
constantMatrix[i+2][1] = QualityUtils.qualToProbLog10(overallGCP[i]);
|
||||
constantMatrix[i+2][2] = QualityUtils.qualToErrorProbLog10(insertionGOP[i]);
|
||||
constantMatrix[i+2][3] = QualityUtils.qualToErrorProbLog10(overallGCP[i]);
|
||||
constantMatrix[i+2][4] = QualityUtils.qualToErrorProbLog10(deletionGOP[i]);
|
||||
constantMatrix[i+2][5] = QualityUtils.qualToErrorProbLog10(overallGCP[i]);
|
||||
}
|
||||
constantMatrix[l+1][4] = 0.0;
|
||||
constantMatrix[l+1][5] = 0.0;
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 3);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 4);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 5);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 6);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 7);
|
||||
new GetReferenceBytesTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", "GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", 6);
|
||||
new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 66);
|
||||
new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 76);
|
||||
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 3);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 4);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 5);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 6);
|
||||
new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 7);
|
||||
new GetReferenceBytesTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", "AAAAAAAAAAAAA", 6);
|
||||
new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 66);
|
||||
new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 76);
|
||||
|
||||
return GetReferenceBytesTestProvider.getTests(GetReferenceBytesTestProvider.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates a cell in the HMM matrix
|
||||
*
|
||||
* The read and haplotype indices are offset by one because the state arrays have an extra column to hold the
|
||||
* initial conditions
|
||||
|
||||
* @param indI row index in the matrices to update
|
||||
* @param indJ column index in the matrices to update
|
||||
* @param prior the likelihood editing distance matrix for the read x haplotype
|
||||
* @param constants an array with the six constants relevant to this location
|
||||
* @param matchMetricArray the matches likelihood matrix
|
||||
* @param XMetricArray the insertions likelihood matrix
|
||||
* @param YMetricArray the deletions likelihood matrix
|
||||
*/
|
||||
private void updateCell( final int indI, final int indJ, final double prior, final double[] constants,
|
||||
final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
|
||||
|
||||
matchMetricArray[indI][indJ] = prior +
|
||||
MathUtils.approximateLog10SumLog10( matchMetricArray[indI - 1][indJ - 1] + constants[0],
|
||||
XMetricArray[indI - 1][indJ - 1] + constants[1],
|
||||
YMetricArray[indI - 1][indJ - 1] + constants[1] );
|
||||
XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10( matchMetricArray[indI - 1][indJ] + constants[2],
|
||||
XMetricArray[indI - 1][indJ] + constants[3]);
|
||||
YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10( matchMetricArray[indI][indJ - 1] + constants[4],
|
||||
YMetricArray[indI][indJ - 1] + constants[5]);
|
||||
@Test(dataProvider = "GetReferenceBytesTestProvider", enabled = true)
|
||||
public void testGetReferenceBytes(GetReferenceBytesTestProvider cfg) {
|
||||
Assert.assertEquals(cfg.calculatedReferenceBytes(), cfg.expectedReferenceBytes(), "Reference sequences do not match");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCallerIntegrationTest.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends WalkerTest {
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
|
||||
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a960722c1ae2b6f774d3443a7e5ac27d");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
|
||||
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec);
|
||||
}
|
||||
|
||||
// TODO -- need a better symbolic allele test
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "56f2ef9acc6c0d267cf2b7a447d87fb7");
|
||||
}
|
||||
|
||||
private void HCTestComplexGGA(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerComplexGGA: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"417174e043dbb8b86cc3871da9b50536");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"f2df7a8f53ce449e4a8e8f8496e7c745");
|
||||
}
|
||||
}
|
||||
|
|
@ -54,11 +54,11 @@ import java.util.Collections;
|
|||
|
||||
public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||
final static String REF = b37KGReference;
|
||||
final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||
final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||
final static String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||
|
||||
private void HCTest(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
|
|
@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "72ce6a5e46644dfd73aeffba9d6131ea");
|
||||
HCTest(CEUTRIO_BAM, "", "aac5517a0a64ad291b6b00825d982f7f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "f9d696391f1f337092d70e3abcd32bfb");
|
||||
HCTest(NA12878_BAM, "", "3bfab723fb0f3a65998d82152b67ed15");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
|
|
@ -84,47 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"4e8beb2cdc3d77427f14acf37cea2bd0");
|
||||
}
|
||||
|
||||
private void HCTestComplexGGA(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerComplexGGA: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"75e1df0dcf3728fd2b6e4735c4cc88ce");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"1d244f2adbc72a0062eb673d56cbb5a8");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a1bc844f62a9cb60dbb70d00ad36b85d");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "23956e572f19ff26d25bbdfaa307675b");
|
||||
"283524b3e3397634d4cf0dc2b8723002");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
|
|
@ -135,25 +95,24 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "1255f466aa2d288f015cd55d8fece1ac");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f1f867dbbe3747f16a0d9e5f11e6ed64");
|
||||
}
|
||||
|
||||
// That problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
// was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to
|
||||
// map call. So the test is there for consistency but not for correctness. I'm not sure we can trust
|
||||
// any of the calls in that region because it is so messy. The only thing I would maybe be worried about is
|
||||
// that the three calls that are missing happen to all be the left most calls in the region
|
||||
// any of the calls in that region because it is so messy.
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("103c91c4a78164949e166d3d27eb459b"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ccd30e226f097a40cdeebaa035a290a7"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("87fe31a4bbd68a9eb5d5910db5011c82"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a17e95c1191e3aef7892586fe38ca050"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -175,7 +134,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("0fa19ec5cf737a3445544b59ecc995e9"));
|
||||
Arrays.asList("adb08cb25e902cfe0129404a682b2169"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -183,7 +142,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||
Arrays.asList("5f4cbdcc9bffee6bba258dfac89492ed"));
|
||||
Arrays.asList("6debe567cd5ed7eb5756b6605a151f56"));
|
||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
public class HaplotypeCallerModesIntegrationTest extends WalkerTest {
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing that writing a BAM works
|
||||
//
|
||||
// I don't really care about the MD5s, so I'm just not providing them here, so they don't have to be
|
||||
// updated. These tests are basically ensuring that the code doesn't just randomly blow up.
|
||||
//
|
||||
// TODO -- what i'd really like to ensure here isn't the MD5 but that the BAMs can be read by the GATK or IGV
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void HCTestBamWriterCalledHaplotypes() {
|
||||
HCTestBamWriter(HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES, ""); // current MD5 = 9a2b6157f14b44b872a77f4e75c56023
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestBamWriterAllHaplotypes() {
|
||||
HCTestBamWriter(HaplotypeBAMWriter.Type.ALL_POSSIBLE_HAPLOTYPES, ""); // current MD5 = 06d885d82be81b8eef13bbfcd8041189
|
||||
}
|
||||
|
||||
public void HCTestBamWriter(final HaplotypeBAMWriter.Type type, final String md5) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam -o /dev/null " +
|
||||
"-bamout %s -L 20:10,000,000-10,010,000 -bamWriterType " + type, 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("HC writing bams with mode " + type, spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -82,7 +82,7 @@ public class KBestPathsUnitTest {
|
|||
@Test(dataProvider = "BasicBubbleDataProvider")
|
||||
public void testBasicBubbleData(final int refBubbleLength, final int altBubbleLength) {
|
||||
// Construct the assembly graph
|
||||
DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
DeBruijnAssemblyGraph graph = new DeBruijnAssemblyGraph();
|
||||
final int KMER_LENGTH = 3;
|
||||
final String preRef = "ATGG";
|
||||
final String postRef = new String(Utils.dupBytes((byte) 'A', KMER_LENGTH-1)) + "GGGGC";
|
||||
|
|
@ -142,7 +142,7 @@ public class KBestPathsUnitTest {
|
|||
@Test(dataProvider = "TripleBubbleDataProvider")
|
||||
public void testTripleBubbleData(final int refBubbleLength, final int altBubbleLength, final boolean offRefBeginning, final boolean offRefEnding) {
|
||||
// Construct the assembly graph
|
||||
DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
DeBruijnAssemblyGraph graph = new DeBruijnAssemblyGraph();
|
||||
final int KMER_LENGTH = 3;
|
||||
final String preAltOption = "ATCGATCGATCGATCGATCG";
|
||||
final String postAltOption = "CCCC";
|
||||
|
|
@ -211,7 +211,7 @@ public class KBestPathsUnitTest {
|
|||
if( offRefBeginning ) {
|
||||
expectedCigar.add(new CigarElement(preAltOption.length(), CigarOperator.I));
|
||||
}
|
||||
expectedCigar.add(new CigarElement(preRef.length() - ( offRefBeginning ? KMER_LENGTH - 1 : 0 ), CigarOperator.M));
|
||||
expectedCigar.add(new CigarElement(preRef.length() - (KMER_LENGTH - 1), CigarOperator.M));
|
||||
// first bubble
|
||||
if( refBubbleLength > altBubbleLength ) {
|
||||
expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* User: carneiro
|
||||
* Date: 2/16/13
|
||||
* Time: 11:48 PM
|
||||
*/
|
||||
public class ReadBinUnitTest {
|
||||
private GenomeLocParser parser;
|
||||
private ReadBin readBin;
|
||||
|
||||
private final int readLength = 100; // all reads will have the same size
|
||||
private final int referencePadding = 10; // standard reference padding
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
parser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader().getSequenceDictionary());
|
||||
readBin = new ReadBin(parser, referencePadding);
|
||||
}
|
||||
|
||||
@DataProvider(name = "reads")
|
||||
public Object[][] reads() {
|
||||
|
||||
return new Object[][]{
|
||||
{"20S80M", 80},
|
||||
{"80M20S", 1},
|
||||
{"20S60M20S", 50},
|
||||
{"100M", 500}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the GenomeLoc variable in the ReadBin after adding arbitrary reads
|
||||
*
|
||||
* @param cigarString the read's cigar string
|
||||
* @param alignmentStart the read's alignment start
|
||||
*/
|
||||
@Test(enabled = true, dataProvider = "reads")
|
||||
public void testAddingReads(String cigarString, int alignmentStart) {
|
||||
final GATKSAMRecord read = createReadAndAddToBin(cigarString, alignmentStart);
|
||||
final GenomeLoc readLoc = parser.createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getSoftStart(), read.getSoftEnd());
|
||||
Assert.assertEquals(readBin.getLocation(), readLoc);
|
||||
readBin.clear();
|
||||
}
|
||||
|
||||
public GATKSAMRecord createReadAndAddToBin(String cigarString, int alignmentStart) {
|
||||
final GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
|
||||
read.setCigarString(cigarString);
|
||||
read.setAlignmentStart(alignmentStart);
|
||||
readBin.add(read);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -62,6 +62,27 @@ import java.util.*;
|
|||
*/
|
||||
public class CombineVariantsUnitTest {
|
||||
|
||||
public static int VCF4headerStringCount = 16;
|
||||
|
||||
public static String VCF4headerStrings =
|
||||
"##fileformat=VCFv4.0\n"+
|
||||
"##filedate=2010-06-21\n"+
|
||||
"##reference=NCBI36\n"+
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
|
||||
"##INFO=<ID=AF, Number=A, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||
"##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n"+
|
||||
"##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n"+
|
||||
"##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n"+
|
||||
"##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">\n"+
|
||||
"##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">\n"+
|
||||
"##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">\n"+
|
||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
|
||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n"+
|
||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
|
||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n"+
|
||||
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
|
||||
|
||||
// this header is a small subset of the header in VCFHeaderUnitTest: VCF4headerStrings
|
||||
public static String VCF4headerStringsSmallSubset =
|
||||
"##fileformat=VCFv4.0\n" +
|
||||
|
|
@ -159,34 +180,34 @@ public class CombineVariantsUnitTest {
|
|||
|
||||
@Test
|
||||
public void testHeadersWhereOneIsAStrictSubsetOfTheOther() {
|
||||
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings);
|
||||
VCFHeader one = createHeader(VCF4headerStrings);
|
||||
VCFHeader two = createHeader(VCF4headerStringsSmallSubset);
|
||||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
|
||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||
Assert.assertEquals(lines.size(), VCF4headerStringCount);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=IllegalStateException.class)
|
||||
public void testHeadersInfoDifferentValues() {
|
||||
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings);
|
||||
VCFHeader one = createHeader(VCF4headerStrings);
|
||||
VCFHeader two = createHeader(VCF4headerStringsBrokenInfo);
|
||||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
|
||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||
Assert.assertEquals(lines.size(), VCF4headerStringCount);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeadersFormatDifferentValues() {
|
||||
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings);
|
||||
VCFHeader one = createHeader(VCF4headerStrings);
|
||||
VCFHeader two = createHeader(VCF4headerStringsBrokenFormat);
|
||||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
|
||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||
Assert.assertEquals(lines.size(), VCF4headerStringCount);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
public class ContigComparatorUnitTest extends BaseTest {
|
||||
SAMSequenceDictionary dictForFails;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
// sequence
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(seq);
|
||||
dictForFails = genomeLocParser.getContigs();
|
||||
}
|
||||
|
||||
@DataProvider(name = "MyDataProvider")
|
||||
public Object[][] makeMyDataProvider() throws Exception {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final String ref : Arrays.asList(b37KGReference, hg18Reference) ) {
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(ref));
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(seq);
|
||||
final SAMSequenceDictionary dict = genomeLocParser.getContigs();
|
||||
|
||||
for ( final SAMSequenceRecord rec1 : dict.getSequences() ) {
|
||||
for ( final SAMSequenceRecord rec2 : dict.getSequences() ) {
|
||||
final int expected = Integer.valueOf(rec1.getSequenceIndex()).compareTo(rec2.getSequenceIndex());
|
||||
tests.add(new Object[]{dict, rec1.getSequenceName(), rec2.getSequenceName(), expected});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "MyDataProvider")
|
||||
public void testMyData(final SAMSequenceDictionary dict, final String contig1, final String contig2, final int expected) {
|
||||
final ContigComparator comparator = new ContigComparator(dict);
|
||||
final int actual = comparator.compare(contig1, contig2);
|
||||
if ( expected == 0 )
|
||||
Assert.assertEquals(actual, expected, "Failed comparison of equals contigs");
|
||||
else if ( expected < 0 )
|
||||
Assert.assertTrue(actual < 0, "Failed comparison of contigs where expected < 0 ");
|
||||
else
|
||||
Assert.assertTrue(actual > 0, "Failed comparison of contigs where expected > 0 ");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testBadCallWithUnknownContig1() {
|
||||
final ContigComparator comparator = new ContigComparator(dictForFails);
|
||||
final int actual = comparator.compare("1", "chr1");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testBadCallWithUnknownContig2() {
|
||||
final ContigComparator comparator = new ContigComparator(dictForFails);
|
||||
final int actual = comparator.compare("chr1", "1");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testBadCallWithNullContig() {
|
||||
final ContigComparator comparator = new ContigComparator(dictForFails);
|
||||
final int actual = comparator.compare(null, "chr1");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class SWPairwiseAlignmentUnitTest extends BaseTest {
|
||||
@DataProvider(name = "ComplexReadAlignedToRef")
|
||||
public Object[][] makeComplexReadAlignedToRef() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final String ref1 = "ACTGACTGACTG";
|
||||
tests.add(new Object[]{"AAAGGACTGACTG", ref1, 1, "12M"});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = true)
|
||||
public void testReadAlignedToRefComplexAlignment(final String reference, final String read, final int expectedStart, final String expectedCigar) {
|
||||
final SWPairwiseAlignment sw = new SWPairwiseAlignment(reference.getBytes(), read.getBytes());
|
||||
Assert.assertEquals(sw.getAlignmentStart2wrt1(), expectedStart);
|
||||
Assert.assertEquals(sw.getCigar().toString(), expectedCigar);
|
||||
}
|
||||
|
||||
// TODO
|
||||
// TODO
|
||||
// TODO this example demonstrates some kind of failure mode of SW that results in the read not being aligned
|
||||
// TODO to the reference at all. It has something to do with the specific parameters provided to the
|
||||
// TODO SW code. With the default parameters the result is the one expected. With the specified parameters
|
||||
// TODO the code fails
|
||||
// TODO
|
||||
// TODO
|
||||
@Test(enabled = false)
|
||||
public void testOddNoAlignment() {
|
||||
final String reference = "AAAGACTACTG";
|
||||
final String read = "AACGGACACTG";
|
||||
final int expectedStart = 0;
|
||||
final String expectedCigar = "11M";
|
||||
final SWPairwiseAlignment sw = new SWPairwiseAlignment(reference.getBytes(), read.getBytes(), 5.0, -10.0, -22.0, -1.2);
|
||||
sw.printAlignment(reference.getBytes(), read.getBytes());
|
||||
Assert.assertEquals(sw.getAlignmentStart2wrt1(), expectedStart);
|
||||
Assert.assertEquals(sw.getCigar().toString(), expectedCigar);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,241 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialBAMBuilder;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
|
||||
|
||||
// example genome loc parser for this test, can be deleted if you don't use the reference
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
// example fasta index file, can be deleted if you don't use the reference
|
||||
private IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
// sequence
|
||||
seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testMultiAlleleWithHomLiks() {
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||
final GenomeLoc myLocation = genomeLocParser.createGenomeLoc("1", 10);
|
||||
|
||||
final int pileupSize = 100;
|
||||
final int readLength = 10;
|
||||
final List<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||
for ( int i = 0; i < pileupSize; i++ ) {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead" + i, 0, 1, readLength);
|
||||
final byte[] bases = Utils.dupBytes((byte)'A', readLength);
|
||||
bases[0] = (byte)(i % 2 == 0 ? 'A' : 'C'); // every other read the first base is a C
|
||||
|
||||
// set the read's bases and quals
|
||||
read.setReadBases(bases);
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
reads.add(read);
|
||||
}
|
||||
|
||||
// create a pileup with all reads having offset 0
|
||||
final ReadBackedPileup pileup = new ReadBackedPileupImpl(myLocation, reads, 0);
|
||||
Allele base_A = Allele.create(BaseUtils.Base.A.base);
|
||||
Allele base_C = Allele.create(BaseUtils.Base.C.base);
|
||||
Allele base_T = Allele.create(BaseUtils.Base.T.base);
|
||||
|
||||
PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
for ( final PileupElement e : pileup ) {
|
||||
for ( final Allele allele : Arrays.asList(base_A,base_C,base_T) ) {
|
||||
Double likelihood = allele == base_A ? -0.04 : -3.0;
|
||||
perReadAlleleLikelihoodMap.add(e,allele,likelihood);
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.size(),pileup.depthOfCoverage());
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap().keySet().size(),3);
|
||||
Map<Allele,List<GATKSAMRecord>> shouldBeAllA = perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap();
|
||||
Assert.assertEquals(shouldBeAllA.get(base_A).size(),pileup.depthOfCoverage());
|
||||
Assert.assertEquals(shouldBeAllA.get(base_C).size(),0);
|
||||
Assert.assertEquals(shouldBeAllA.get(base_T).size(),0);
|
||||
}
|
||||
|
||||
|
||||
@Test()
|
||||
public void testMultiAlleleWithHetLiks() {
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||
final GenomeLoc myLocation = genomeLocParser.createGenomeLoc("1", 10);
|
||||
|
||||
final int pileupSize = 100;
|
||||
final int readLength = 10;
|
||||
final List<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||
for ( int i = 0; i < pileupSize; i++ ) {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead" + i, 0, 1, readLength);
|
||||
final byte[] bases = Utils.dupBytes((byte)'A', readLength);
|
||||
bases[0] = (byte)(i % 2 == 0 ? 'A' : 'C'); // every other base is a C
|
||||
|
||||
// set the read's bases and quals
|
||||
read.setReadBases(bases);
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
reads.add(read);
|
||||
}
|
||||
|
||||
// create a pileup with all reads having offset 0
|
||||
final ReadBackedPileup pileup = new ReadBackedPileupImpl(myLocation, reads, 0);
|
||||
Allele base_A = Allele.create(BaseUtils.Base.A.base);
|
||||
Allele base_C = Allele.create(BaseUtils.Base.C.base);
|
||||
Allele base_T = Allele.create(BaseUtils.Base.T.base);
|
||||
|
||||
PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
int idx = 0;
|
||||
for ( final PileupElement e : pileup ) {
|
||||
for ( final Allele allele : Arrays.asList(base_A,base_C,base_T) ) {
|
||||
Double likelihood;
|
||||
if ( idx % 2 == 0 )
|
||||
likelihood = allele == base_A ? -0.04 : -3.0;
|
||||
else
|
||||
likelihood = allele == base_C ? -0.04 : -3.0;
|
||||
perReadAlleleLikelihoodMap.add(e,allele,likelihood);
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.size(),pileup.depthOfCoverage());
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap().keySet().size(),3);
|
||||
Map<Allele,List<GATKSAMRecord>> halfAhalfC = perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap();
|
||||
Assert.assertEquals(halfAhalfC.get(base_A).size(),pileup.depthOfCoverage()/2);
|
||||
Assert.assertEquals(halfAhalfC.get(base_C).size(),pileup.depthOfCoverage()/2);
|
||||
Assert.assertEquals(halfAhalfC.get(base_T).size(),0);
|
||||
|
||||
// make sure the likelihoods are retrievable
|
||||
|
||||
idx = 0;
|
||||
for ( final PileupElement e : pileup ) {
|
||||
Assert.assertTrue(perReadAlleleLikelihoodMap.containsPileupElement(e));
|
||||
Map<Allele,Double> likelihoods = perReadAlleleLikelihoodMap.getLikelihoodsAssociatedWithPileupElement(e);
|
||||
for ( final Allele allele : Arrays.asList(base_A,base_C,base_T) ) {
|
||||
Double expLik;
|
||||
if ( idx % 2 == 0 )
|
||||
expLik = allele == base_A ? -0.04 : -3.0;
|
||||
else
|
||||
expLik = allele == base_C ? -0.04 : -3.0;
|
||||
Assert.assertEquals(likelihoods.get(allele),expLik);
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
// and test downsampling for good measure
|
||||
|
||||
final List<GATKSAMRecord> excessReads = new LinkedList<GATKSAMRecord>();
|
||||
int prevSize = perReadAlleleLikelihoodMap.size();
|
||||
for ( int i = 0; i < 10 ; i++ ) {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myExcessRead" + i, 0, 1, readLength);
|
||||
final byte[] bases = Utils.dupBytes((byte)'A', readLength);
|
||||
bases[0] = (byte)(i % 2 == 0 ? 'A' : 'C'); // every other base is a C
|
||||
|
||||
// set the read's bases and quals
|
||||
read.setReadBases(bases);
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
for ( final Allele allele : Arrays.asList(base_A,base_C,base_T) ) {
|
||||
perReadAlleleLikelihoodMap.add(read,allele,allele==base_A ? -0.04 : -3.0);
|
||||
}
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.size(),1+prevSize);
|
||||
prevSize = perReadAlleleLikelihoodMap.size();
|
||||
}
|
||||
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.size(),pileup.depthOfCoverage()+10);
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap().get(base_A).size(),60);
|
||||
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(0.1,null);
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.size(),(int) (0.9*(pileup.depthOfCoverage()+10)));
|
||||
|
||||
Map<Allele,List<GATKSAMRecord>> downsampledStrat = perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap();
|
||||
Assert.assertEquals(downsampledStrat.get(base_A).size(),(int) (pileup.depthOfCoverage()/2) - 1);
|
||||
Assert.assertEquals(downsampledStrat.get(base_C).size(),(int) (pileup.depthOfCoverage()/2));
|
||||
Assert.assertEquals(downsampledStrat.get(base_T).size(),0);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
// ********************************************************************************** //
|
||||
// Note that this class also serves as an integration test for the VariantAnnotator! //
|
||||
// ********************************************************************************** //
|
||||
|
||||
public class NanoSchedulerIntegrationTest extends WalkerTest {
|
||||
@DataProvider(name = "NanoSchedulerUGTest")
|
||||
public Object[][] createNanoSchedulerUGTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int nt : Arrays.asList(1, 2) )
|
||||
for ( final int nct : Arrays.asList(1, 2) ) {
|
||||
// tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct });
|
||||
//// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct });
|
||||
tests.add(new Object[]{ "BOTH", "85fc5d6dfeb60ed89763470f4b4c981e", nt, nct });
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "NanoSchedulerUGTest")
|
||||
private void testNanoSchedulerUGTest(final String glm, final String md5, final int nt, final int nct ) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference,
|
||||
"--no_cmdline_in_header -G",
|
||||
//"--dbsnp " + b37dbSNP132,
|
||||
"-I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam",
|
||||
"-L 20:10,000,000-10,100,000",
|
||||
"-glm " + glm,
|
||||
"--contamination_fraction_to_filter 0.0",
|
||||
"-nt " + nt,
|
||||
"-nct " + nct,
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList(md5)
|
||||
);
|
||||
executeTest(String.format("testUG-glm:%s-nt%d-nct%d", glm, nt, nct), spec);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -50,20 +50,31 @@ package org.broadinstitute.sting.utils.pairhmm;
|
|||
// the imports for unit testing.
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
public class PairHMMUnitTest extends BaseTest {
|
||||
private final static boolean ALLOW_READS_LONGER_THAN_HAPLOTYPE = true;
|
||||
private final static boolean DEBUG = false;
|
||||
final static boolean EXTENSIVE_TESTING = true;
|
||||
PairHMM exactHMM = new ExactPairHMM(); // the log truth implementation
|
||||
PairHMM originalHMM = new OriginalPairHMM(); // the reference implementation
|
||||
PairHMM cachingHMM = new CachingPairHMM();
|
||||
PairHMM loglessHMM = new LoglessCachingPairHMM();
|
||||
final PairHMM exactHMM = new Log10PairHMM(true); // the log truth implementation
|
||||
final PairHMM originalHMM = new Log10PairHMM(false); // the reference implementation
|
||||
final PairHMM loglessHMM = new LoglessCachingPairHMM();
|
||||
|
||||
private List<PairHMM> getHMMs() {
|
||||
return Arrays.asList(exactHMM, originalHMM, loglessHMM);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -71,11 +82,12 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class BasicLikelihoodTestProvider extends TestDataProvider {
|
||||
private class BasicLikelihoodTestProvider {
|
||||
final String ref, read;
|
||||
final byte[] refBasesWithContext, readBasesWithContext;
|
||||
final int baseQual, insQual, delQual, gcp;
|
||||
final int expectedQual;
|
||||
final boolean left, right;
|
||||
final static String CONTEXT = "ACGTAATGACGATTGCA";
|
||||
final static String LEFT_FLANK = "GATTTATCATCGAGTCTGC";
|
||||
final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTTA";
|
||||
|
|
@ -85,7 +97,6 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) {
|
||||
super(BasicLikelihoodTestProvider.class, String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual));
|
||||
this.baseQual = baseQual;
|
||||
this.delQual = delQual;
|
||||
this.insQual = insQual;
|
||||
|
|
@ -93,13 +104,30 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
this.read = read;
|
||||
this.ref = ref;
|
||||
this.expectedQual = expectedQual;
|
||||
this.left = left;
|
||||
this.right = right;
|
||||
|
||||
refBasesWithContext = asBytes(ref, left, right);
|
||||
readBasesWithContext = asBytes(read, false, false);
|
||||
}
|
||||
|
||||
public double expectedLogL() {
|
||||
return (expectedQual / -10.0) + 0.03 ;
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual);
|
||||
}
|
||||
|
||||
public double expectedLogL(final PairHMM hmm) {
|
||||
return (expectedQual / -10.0) + 0.03 +
|
||||
hmm.getNPotentialXStartsLikelihoodPenaltyLog10(refBasesWithContext.length, readBasesWithContext.length);
|
||||
}
|
||||
|
||||
public double getTolerance(final PairHMM hmm) {
|
||||
if ( hmm instanceof LoglessCachingPairHMM )
|
||||
return toleranceFromExact();
|
||||
if ( hmm instanceof Log10PairHMM ) {
|
||||
return ((Log10PairHMM)hmm).isDoingExactLog10Calculations() ? toleranceFromExact() : toleranceFromReference();
|
||||
} else
|
||||
return toleranceFromTheoretical();
|
||||
}
|
||||
|
||||
public double toleranceFromTheoretical() {
|
||||
|
|
@ -107,7 +135,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public double toleranceFromReference() {
|
||||
return 1E-4;
|
||||
return 1E-3; // has to be very tolerant -- this approximation is quite approximate
|
||||
}
|
||||
|
||||
public double toleranceFromExact() {
|
||||
|
|
@ -115,7 +143,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public double calcLogL( final PairHMM pairHMM, boolean anchorIndel ) {
|
||||
pairHMM.initialize(readBasesWithContext.length, refBasesWithContext.length);
|
||||
pairHMM.initialize(refBasesWithContext.length, readBasesWithContext.length);
|
||||
return pairHMM.computeReadLikelihoodGivenHaplotypeLog10(
|
||||
refBasesWithContext, readBasesWithContext,
|
||||
qualAsBytes(baseQual, false, anchorIndel), qualAsBytes(insQual, true, anchorIndel), qualAsBytes(delQual, true, anchorIndel),
|
||||
|
|
@ -157,6 +185,8 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(8, 10, 20) : Arrays.asList(10);
|
||||
final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20,30,35) : Arrays.asList(2);
|
||||
|
||||
final List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int baseQual : baseQuals ) {
|
||||
for ( final int indelQual : indelQuals ) {
|
||||
for ( final int gcp : gcps ) {
|
||||
|
|
@ -167,7 +197,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
final String ref = new String(new byte[]{refBase});
|
||||
final String read = new String(new byte[]{readBase});
|
||||
final int expected = refBase == readBase ? 0 : baseQual;
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
|
||||
tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp)});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -183,10 +213,10 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
final String ref = insertionP ? small : big;
|
||||
final String read = insertionP ? big : small;
|
||||
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false);
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true);
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true);
|
||||
tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp)});
|
||||
tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false)});
|
||||
tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true)});
|
||||
tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true)});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -194,19 +224,20 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
final Random random = new Random(87860573);
|
||||
@DataProvider(name = "OptimizedLikelihoodTestProvider")
|
||||
public Object[][] makeOptimizedLikelihoodTests() {
|
||||
// context on either side is ACGTTGCA REF ACGTTGCA
|
||||
// test all combinations
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
final Random random = GenomeAnalysisEngine.getRandomGenerator();
|
||||
final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 30, 40, 60) : Arrays.asList(30);
|
||||
final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(20, 40, 60) : Arrays.asList(40);
|
||||
final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30) : Arrays.asList(10);
|
||||
final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(3, 20, 50, 90, 160) : Arrays.asList(2);
|
||||
|
||||
final List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int baseQual : baseQuals ) {
|
||||
for ( final int indelQual : indelQuals ) {
|
||||
for ( final int gcp : gcps ) {
|
||||
|
|
@ -220,45 +251,51 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
for( int iii = 0; iii < readSize; iii++) {
|
||||
read += (char) BaseUtils.BASES[random.nextInt(4)];
|
||||
}
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp);
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, true, false);
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, false, true);
|
||||
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, true, true);
|
||||
|
||||
for ( final boolean leftFlank : Arrays.asList(true, false) )
|
||||
for ( final boolean rightFlank : Arrays.asList(true, false) )
|
||||
tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, leftFlank, rightFlank)});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true)
|
||||
@Test(enabled = !DEBUG, dataProvider = "BasicLikelihoodTestProvider")
|
||||
public void testBasicLikelihoods(BasicLikelihoodTestProvider cfg) {
|
||||
double exactLogL = cfg.calcLogL( exactHMM, true );
|
||||
double calculatedLogL = cfg.calcLogL( originalHMM, true );
|
||||
double optimizedLogL = cfg.calcLogL( cachingHMM, true );
|
||||
double loglessLogL = cfg.calcLogL( loglessHMM, true );
|
||||
double expectedLogL = cfg.expectedLogL();
|
||||
//logger.warn(String.format("Test: logL calc=%.2f optimized=%.2f logless=%.2f expected=%.2f for %s", calculatedLogL, optimizedLogL, loglessLogL, expectedLogL, cfg.toString()));
|
||||
Assert.assertEquals(exactLogL, expectedLogL, cfg.toleranceFromTheoretical());
|
||||
Assert.assertEquals(calculatedLogL, expectedLogL, cfg.toleranceFromTheoretical());
|
||||
Assert.assertEquals(optimizedLogL, calculatedLogL, cfg.toleranceFromReference());
|
||||
Assert.assertEquals(loglessLogL, exactLogL, cfg.toleranceFromExact());
|
||||
if ( ALLOW_READS_LONGER_THAN_HAPLOTYPE || cfg.read.length() <= cfg.ref.length() ) {
|
||||
final double exactLogL = cfg.calcLogL( exactHMM, true );
|
||||
for ( final PairHMM hmm : getHMMs() ) {
|
||||
double actualLogL = cfg.calcLogL( hmm, true );
|
||||
double expectedLogL = cfg.expectedLogL(hmm);
|
||||
|
||||
// compare to our theoretical expectation with appropriate tolerance
|
||||
Assert.assertEquals(actualLogL, expectedLogL, cfg.toleranceFromTheoretical(), "Failed with hmm " + hmm);
|
||||
// compare to the exact reference implementation with appropriate tolerance
|
||||
Assert.assertEquals(actualLogL, exactLogL, cfg.getTolerance(hmm), "Failed with hmm " + hmm);
|
||||
Assert.assertTrue(MathUtils.goodLog10Probability(actualLogL), "Bad log10 likelihood " + actualLogL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "OptimizedLikelihoodTestProvider", enabled = true)
|
||||
@Test(enabled = !DEBUG, dataProvider = "OptimizedLikelihoodTestProvider")
|
||||
public void testOptimizedLikelihoods(BasicLikelihoodTestProvider cfg) {
|
||||
double exactLogL = cfg.calcLogL( exactHMM, false );
|
||||
double calculatedLogL = cfg.calcLogL( originalHMM, false );
|
||||
double optimizedLogL = cfg.calcLogL( cachingHMM, false );
|
||||
double loglessLogL = cfg.calcLogL( loglessHMM, false );
|
||||
//logger.warn(String.format("Test: logL calc=%.2f optimized=%.2f logless=%.2f expected=%.2f for %s", calculatedLogL, optimizedLogL, loglessLogL, expectedLogL, cfg.toString()));
|
||||
Assert.assertEquals(optimizedLogL, calculatedLogL, cfg.toleranceFromReference());
|
||||
Assert.assertEquals(loglessLogL, exactLogL, cfg.toleranceFromExact());
|
||||
if ( ALLOW_READS_LONGER_THAN_HAPLOTYPE || cfg.read.length() <= cfg.ref.length() ) {
|
||||
double exactLogL = cfg.calcLogL( exactHMM, false );
|
||||
|
||||
for ( final PairHMM hmm : getHMMs() ) {
|
||||
double calculatedLogL = cfg.calcLogL( hmm, false );
|
||||
// compare to the exact reference implementation with appropriate tolerance
|
||||
Assert.assertEquals(calculatedLogL, exactLogL, cfg.getTolerance(hmm), String.format("Test: logL calc=%.2f expected=%.2f for %s with hmm %s", calculatedLogL, exactLogL, cfg.toString(), hmm));
|
||||
Assert.assertTrue(MathUtils.goodLog10Probability(calculatedLogL), "Bad log10 likelihood " + calculatedLogL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testMismatchInEveryPositionInTheReadWithCenteredHaplotype() {
|
||||
byte[] haplotype1 = "TTCTCTTCTGTTGTGGCTGGTT".getBytes();
|
||||
|
||||
|
|
@ -277,7 +314,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length-offset);
|
||||
// change single base at position k to C. If it's a C, change to T
|
||||
mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
|
||||
originalHMM.initialize(mread.length, haplotype1.length);
|
||||
originalHMM.initialize(haplotype1.length, mread.length);
|
||||
double res1 = originalHMM.computeReadLikelihoodGivenHaplotypeLog10(
|
||||
haplotype1, mread,
|
||||
quals, gop, gop,
|
||||
|
|
@ -285,11 +322,12 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
|
||||
System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
|
||||
|
||||
Assert.assertEquals(res1, -2.0, 1e-2);
|
||||
// - log10 is because of number of start positions
|
||||
Assert.assertEquals(res1, -2.0 - Math.log10(originalHMM.getNPotentialXStarts(haplotype1.length, mread.length)), 1e-2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = ! DEBUG)
|
||||
public void testMismatchInEveryPositionInTheRead() {
|
||||
byte[] haplotype1 = "TTCTCTTCTGTTGTGGCTGGTT".getBytes();
|
||||
|
||||
|
|
@ -308,7 +346,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length);
|
||||
// change single base at position k to C. If it's a C, change to T
|
||||
mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
|
||||
originalHMM.initialize(mread.length, haplotype1.length);
|
||||
originalHMM.initialize(haplotype1.length, mread.length);
|
||||
double res1 = originalHMM.computeReadLikelihoodGivenHaplotypeLog10(
|
||||
haplotype1, mread,
|
||||
quals, gop, gop,
|
||||
|
|
@ -316,7 +354,298 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
|
||||
System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
|
||||
|
||||
Assert.assertEquals(res1, -2.0, 1e-2);
|
||||
// - log10 is because of number of start positions
|
||||
Assert.assertEquals(res1, -2.0 - Math.log10(originalHMM.getNPotentialXStarts(haplotype1.length, mread.length)), 1e-2);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "HMMProvider")
|
||||
public Object[][] makeHMMProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int readSize : Arrays.asList(1, 2, 5, 10) ) {
|
||||
for ( final int refSize : Arrays.asList(1, 2, 5, 10) ) {
|
||||
if ( refSize > readSize ) {
|
||||
for ( final PairHMM hmm : getHMMs() )
|
||||
tests.add(new Object[]{hmm, readSize, refSize});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG, dataProvider = "HMMProvider")
|
||||
void testMultipleReadMatchesInHaplotype(final PairHMM hmm, final int readSize, final int refSize) {
|
||||
byte[] readBases = Utils.dupBytes((byte)'A', readSize);
|
||||
byte[] refBases = ("CC" + new String(Utils.dupBytes((byte)'A', refSize)) + "GGA").getBytes();
|
||||
byte baseQual = 20;
|
||||
byte insQual = 37;
|
||||
byte delQual = 37;
|
||||
byte gcp = 10;
|
||||
hmm.initialize(refBases.length, readBases.length);
|
||||
double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
Utils.dupBytes(baseQual, readBases.length),
|
||||
Utils.dupBytes(insQual, readBases.length),
|
||||
Utils.dupBytes(delQual, readBases.length),
|
||||
Utils.dupBytes(gcp, readBases.length), 0, true);
|
||||
Assert.assertTrue(d <= 0.0, "Likelihoods should be <= 0 but got "+ d);
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG, dataProvider = "HMMProvider")
|
||||
void testAllMatchingRead(final PairHMM hmm, final int readSize, final int refSize) {
|
||||
byte[] readBases = Utils.dupBytes((byte)'A', readSize);
|
||||
byte[] refBases = Utils.dupBytes((byte)'A', refSize);
|
||||
byte baseQual = 20;
|
||||
byte insQual = 100;
|
||||
byte delQual = 100;
|
||||
byte gcp = 100;
|
||||
hmm.initialize(refBases.length, readBases.length);
|
||||
double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
Utils.dupBytes(baseQual, readBases.length),
|
||||
Utils.dupBytes(insQual, readBases.length),
|
||||
Utils.dupBytes(delQual, readBases.length),
|
||||
Utils.dupBytes(gcp, readBases.length), 0, true);
|
||||
final double expected = Math.log10(Math.pow(1.0 - QualityUtils.qualToErrorProb(baseQual), readBases.length));
|
||||
Assert.assertEquals(d, expected, 1e-3, "Likelihoods should sum to just the error prob of the read");
|
||||
}
|
||||
|
||||
@DataProvider(name = "HMMProviderWithBigReads")
|
||||
public Object[][] makeBigReadHMMProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final String read1 = "ACCAAGTAGTCACCGT";
|
||||
final String ref1 = "ACCAAGTAGTCACCGTAACG";
|
||||
|
||||
for ( final int nReadCopies : Arrays.asList(1, 2, 10, 20, 50) ) {
|
||||
for ( final int nRefCopies : Arrays.asList(1, 2, 10, 20, 100) ) {
|
||||
if ( nRefCopies > nReadCopies ) {
|
||||
for ( final PairHMM hmm : getHMMs() ) {
|
||||
final String read = Utils.dupString(read1, nReadCopies);
|
||||
final String ref = Utils.dupString(ref1, nRefCopies);
|
||||
tests.add(new Object[]{hmm, read, ref});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG, dataProvider = "HMMProviderWithBigReads")
|
||||
void testReallyBigReads(final PairHMM hmm, final String read, final String ref) {
|
||||
byte[] readBases = read.getBytes();
|
||||
byte[] refBases = ref.getBytes();
|
||||
byte baseQual = 30;
|
||||
byte insQual = 40;
|
||||
byte delQual = 40;
|
||||
byte gcp = 10;
|
||||
hmm.initialize(refBases.length, readBases.length);
|
||||
double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
Utils.dupBytes(baseQual, readBases.length),
|
||||
Utils.dupBytes(insQual, readBases.length),
|
||||
Utils.dupBytes(delQual, readBases.length),
|
||||
Utils.dupBytes(gcp, readBases.length), 0, true);
|
||||
Assert.assertTrue(MathUtils.goodLog10Probability(d), "Likelihoods = " + d +" was bad for a read with " + read.length() + " bases and ref with " + ref.length() + " bases");
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG)
|
||||
void testPreviousBadValue() {
|
||||
byte[] readBases = "A".getBytes();
|
||||
byte[] refBases = "AT".getBytes();
|
||||
byte baseQual = 30;
|
||||
byte insQual = 40;
|
||||
byte delQual = 40;
|
||||
byte gcp = 10;
|
||||
|
||||
exactHMM.initialize(refBases.length, readBases.length);
|
||||
double d = exactHMM.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
Utils.dupBytes(baseQual, readBases.length),
|
||||
Utils.dupBytes(insQual, readBases.length),
|
||||
Utils.dupBytes(delQual, readBases.length),
|
||||
Utils.dupBytes(gcp, readBases.length), 0, true);
|
||||
//exactHMM.dumpMatrices();
|
||||
|
||||
loglessHMM.initialize(refBases.length, readBases.length);
|
||||
double logless = loglessHMM.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
Utils.dupBytes(baseQual, readBases.length),
|
||||
Utils.dupBytes(insQual, readBases.length),
|
||||
Utils.dupBytes(delQual, readBases.length),
|
||||
Utils.dupBytes(gcp, readBases.length), 0, true);
|
||||
loglessHMM.dumpMatrices();
|
||||
}
|
||||
|
||||
@DataProvider(name = "JustHMMProvider")
|
||||
public Object[][] makeJustHMMProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final PairHMM hmm : getHMMs() ) {
|
||||
tests.add(new Object[]{hmm});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG, dataProvider = "JustHMMProvider")
|
||||
void testMaxLengthsBiggerThanProvidedRead(final PairHMM hmm) {
|
||||
for ( int nExtraMaxSize = 0; nExtraMaxSize < 100; nExtraMaxSize++ ) {
|
||||
byte[] readBases = "CTATCTTAGTAAGCCCCCATACCTGCAAATTTCAGGATGTCTCCTCCAAAAATCAACA".getBytes();
|
||||
byte[] refBases = "CTATCTTAGTAAGCCCCCATACCTGCAAATTTCAGGATGTCTCCTCCAAAAATCAAAACTTCTGAGAAAAAAAAAAAAAATTAAATCAAACCCTGATTCCTTAAAGGTAGTAAAAAAACATCATTCTTTCTTAGTGGAATAGAAACTAGGTCAAAAGAACAGTGATTC".getBytes();
|
||||
byte gcp = 10;
|
||||
|
||||
byte[] quals = new byte[]{35,34,31,32,35,34,32,31,36,30,31,32,36,34,33,32,32,32,33,32,30,35,33,35,36,36,33,33,33,32,32,32,37,33,36,35,33,32,34,31,36,35,35,35,35,33,34,31,31,30,28,27,26,29,26,25,29,29};
|
||||
byte[] insQual = new byte[]{46,46,46,46,46,47,45,46,45,48,47,44,45,48,46,43,43,42,48,48,45,47,47,48,48,47,48,45,38,47,45,39,47,48,47,47,48,46,49,48,49,48,46,47,48,44,44,43,39,32,34,36,46,48,46,44,45,45};
|
||||
byte[] delQual = new byte[]{44,44,44,43,45,44,43,42,45,46,45,43,44,47,45,40,40,40,45,46,43,45,45,44,46,46,46,43,35,44,43,36,44,45,46,46,44,44,47,43,47,45,45,45,46,45,45,46,44,35,35,35,45,47,45,44,44,43};
|
||||
|
||||
final int maxHaplotypeLength = refBases.length + nExtraMaxSize;
|
||||
final int maxReadLength = readBases.length + nExtraMaxSize;
|
||||
|
||||
hmm.initialize(maxHaplotypeLength, maxReadLength);
|
||||
double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
quals,
|
||||
insQual,
|
||||
delQual,
|
||||
Utils.dupBytes(gcp, readBases.length), 0, true);
|
||||
Assert.assertTrue(MathUtils.goodLog10Probability(d), "Likelihoods = " + d +" was bad for a read with " + readBases.length + " bases and ref with " + refBases.length + " bases");
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "HaplotypeIndexingProvider")
|
||||
public Object[][] makeHaplotypeIndexingProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final String root1 = "ACGTGTCAAACCGGGTT";
|
||||
final String root2 = "ACGTGTCACACTGGGTT"; // differs in two locations
|
||||
|
||||
final String read1 = "ACGTGTCACACTGGATT"; // 1 diff from 2, 2 diff from root1
|
||||
final String read2 = root1; // same as root1
|
||||
final String read3 = root2; // same as root2
|
||||
final String read4 = "ACGTGTCACACTGGATTCGAT";
|
||||
final String read5 = "CCAGTAACGTGTCACACTGGATTCGAT";
|
||||
|
||||
// for ( final String read : Arrays.asList(read2) ) {
|
||||
for ( final String read : Arrays.asList(read1, read2, read3, read4, read5) ) {
|
||||
for ( final PairHMM hmm : getHMMs() ) {
|
||||
// int readLength = read.length(); {
|
||||
for ( int readLength = 10; readLength < read.length(); readLength++ ) {
|
||||
final String myRead = read.substring(0, readLength);
|
||||
tests.add(new Object[]{hmm, root1, root2, myRead});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG, dataProvider = "HaplotypeIndexingProvider")
|
||||
void testHaplotypeIndexing(final PairHMM hmm, final String root1, final String root2, final String read) {
|
||||
final double TOLERANCE = 1e-9;
|
||||
final String prefix = "AACCGGTTTTTGGGCCCAAACGTACGTACAGTTGGTCAACATCGATCAGGTTCCGGAGTAC";
|
||||
|
||||
final int maxReadLength = read.length();
|
||||
final int maxHaplotypeLength = prefix.length() + root1.length();
|
||||
|
||||
// the initialization occurs once, at the start of the evalution of reads
|
||||
hmm.initialize(maxHaplotypeLength, maxReadLength);
|
||||
|
||||
for ( int prefixStart = prefix.length(); prefixStart >= 0; prefixStart-- ) {
|
||||
final String myPrefix = prefix.substring(prefixStart, prefix.length());
|
||||
final String hap1 = myPrefix + root1;
|
||||
final String hap2 = myPrefix + root2;
|
||||
|
||||
final int hapStart = PairHMM.findFirstPositionWhereHaplotypesDiffer(hap1.getBytes(), hap2.getBytes());
|
||||
|
||||
final double actual1 = testHaplotypeIndexingCalc(hmm, hap1, read, 0, true);
|
||||
final double actual2 = testHaplotypeIndexingCalc(hmm, hap2, read, hapStart, false);
|
||||
final double expected2 = testHaplotypeIndexingCalc(hmm, hap2, read, 0, true);
|
||||
Assert.assertEquals(actual2, expected2, TOLERANCE, "Caching calculation failed for read " + read + " against haplotype with prefix '" + myPrefix
|
||||
+ "' expected " + expected2 + " but got " + actual2 + " with hapStart of " + hapStart);
|
||||
}
|
||||
}
|
||||
|
||||
private double testHaplotypeIndexingCalc(final PairHMM hmm, final String hap, final String read, final int hapStart, final boolean recache) {
|
||||
final byte[] readBases = read.getBytes();
|
||||
final byte[] baseQuals = Utils.dupBytes((byte)30, readBases.length);
|
||||
final byte[] insQuals = Utils.dupBytes((byte)45, readBases.length);
|
||||
final byte[] delQuals = Utils.dupBytes((byte)40, readBases.length);
|
||||
final byte[] gcp = Utils.dupBytes((byte)10, readBases.length);
|
||||
double d = hmm.computeReadLikelihoodGivenHaplotypeLog10(
|
||||
hap.getBytes(), readBases, baseQuals, insQuals, delQuals, gcp,
|
||||
hapStart, recache);
|
||||
Assert.assertTrue(MathUtils.goodLog10Probability(d), "Likelihoods = " + d + " was bad for read " + read + " and ref " + hap + " with hapStart " + hapStart);
|
||||
return d;
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testFindFirstPositionWhereHaplotypesDiffer() {
|
||||
for ( int haplotypeSize1 = 10; haplotypeSize1 < 30; haplotypeSize1++ ) {
|
||||
for ( int haplotypeSize2 = 10; haplotypeSize2 < 50; haplotypeSize2++ ) {
|
||||
final int maxLength = Math.max(haplotypeSize1, haplotypeSize2);
|
||||
final int minLength = Math.min(haplotypeSize1, haplotypeSize2);
|
||||
for ( int differingSite = 0; differingSite < maxLength + 1; differingSite++) {
|
||||
for ( final boolean oneIsDiff : Arrays.asList(true, false) ) {
|
||||
final byte[] hap1 = Utils.dupBytes((byte)'A', haplotypeSize1);
|
||||
final byte[] hap2 = Utils.dupBytes((byte)'A', haplotypeSize2);
|
||||
|
||||
final int expected = oneIsDiff
|
||||
? makeDiff(hap1, differingSite, minLength)
|
||||
: makeDiff(hap2, differingSite, minLength);
|
||||
final int actual = PairHMM.findFirstPositionWhereHaplotypesDiffer(hap1, hap2);
|
||||
Assert.assertEquals(actual, expected, "Bad differing site for " + new String(hap1) + " vs. " + new String(hap2));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int makeDiff(final byte[] bytes, final int site, final int minSize) {
|
||||
if ( site < bytes.length ) {
|
||||
bytes[site] = 'C';
|
||||
return Math.min(site, minSize);
|
||||
} else
|
||||
return minSize;
|
||||
}
|
||||
|
||||
@DataProvider(name = "UninitializedHMMs")
|
||||
public Object[][] makeUninitializedHMMs() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{new LoglessCachingPairHMM()});
|
||||
tests.add(new Object[]{new Log10PairHMM(true)});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, expectedExceptions = IllegalStateException.class, dataProvider = "UninitializedHMMs")
|
||||
public void testNoInitializeCall(final PairHMM hmm) {
|
||||
byte[] readBases = "A".getBytes();
|
||||
byte[] refBases = "AT".getBytes();
|
||||
byte[] baseQuals = Utils.dupBytes((byte)30, readBases.length);
|
||||
|
||||
// didn't call initialize => should exception out
|
||||
double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
baseQuals, baseQuals, baseQuals, baseQuals, 0, true);
|
||||
}
|
||||
|
||||
@Test(enabled = true, expectedExceptions = IllegalArgumentException.class, dataProvider = "JustHMMProvider")
|
||||
public void testHapTooLong(final PairHMM hmm) {
|
||||
byte[] readBases = "AAA".getBytes();
|
||||
byte[] refBases = "AAAT".getBytes();
|
||||
byte[] baseQuals = Utils.dupBytes((byte)30, readBases.length);
|
||||
|
||||
hmm.initialize(3, 3);
|
||||
double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
baseQuals, baseQuals, baseQuals, baseQuals, 0, true);
|
||||
}
|
||||
|
||||
@Test(enabled = true, expectedExceptions = IllegalArgumentException.class, dataProvider = "JustHMMProvider")
|
||||
public void testReadTooLong(final PairHMM hmm) {
|
||||
byte[] readBases = "AAA".getBytes();
|
||||
byte[] refBases = "AAAT".getBytes();
|
||||
byte[] baseQuals = Utils.dupBytes((byte)30, readBases.length);
|
||||
|
||||
hmm.initialize(3, 2);
|
||||
double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases,
|
||||
baseQuals, baseQuals, baseQuals, baseQuals, 0, true);
|
||||
}
|
||||
}
|
||||
|
|
@ -90,7 +90,7 @@ public class QualQuantizerUnitTest extends BaseTest {
|
|||
this.exError = exError;
|
||||
this.exTotal = exTotal;
|
||||
this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1));
|
||||
this.exQual = QualityUtils.probToQual(1-this.exErrorRate, 0);
|
||||
this.exQual = QualityUtils.errorProbToQual(this.exErrorRate);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
|
@ -58,7 +57,6 @@ import org.testng.Assert;
|
|||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
|
|
@ -207,8 +205,8 @@ public class RecalDatumUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testlog10QempPrior() {
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
|
||||
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_QUAL_SCORE; Qrep++ ) {
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) {
|
||||
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_SAM_QUAL_SCORE; Qrep++ ) {
|
||||
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
|
||||
Assert.assertTrue(log10prior < 0.0);
|
||||
Assert.assertFalse(Double.isInfinite(log10prior));
|
||||
|
|
@ -219,7 +217,7 @@ public class RecalDatumUnitTest extends BaseTest {
|
|||
final int Qrep = 20;
|
||||
int maxQemp = -1;
|
||||
double maxQempValue = -Double.MAX_VALUE;
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) {
|
||||
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
|
||||
if ( log10prior > maxQempValue ) {
|
||||
maxQemp = Qemp;
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ public class RecalibrationReportUnitTest {
|
|||
final Random random = new Random();
|
||||
final int nObservations = random.nextInt(maxObservations);
|
||||
final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_SAM_QUAL_SCORE);
|
||||
return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
|
||||
}
|
||||
|
||||
|
|
@ -75,10 +75,10 @@ public class RecalibrationReportUnitTest {
|
|||
public void testOutput() {
|
||||
final int length = 100;
|
||||
|
||||
List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_QUAL_SCORE + 1);
|
||||
List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_QUAL_SCORE + 1);
|
||||
List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
|
||||
List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
|
||||
|
||||
for (int i = 0; i<= QualityUtils.MAX_QUAL_SCORE; i++) {
|
||||
for (int i = 0; i<= QualityUtils.MAX_SAM_QUAL_SCORE; i++) {
|
||||
quals.add((byte) i);
|
||||
counts.add(1L);
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue