Merging CMI-0.5.0 and GATK-2.2 together.
This commit is contained in:
commit
e35fd1c717
24
build.xml
24
build.xml
|
|
@ -899,7 +899,7 @@
|
|||
|
||||
|
||||
<!-- Build a package consisting of all supporting files. Don't call this target directly. Call one of the specific packaging targets below -->
|
||||
<target name="package" depends="dist,stage,require.executable" description="bundle up an executable for distribution">
|
||||
<target name="package" depends="require.clean,dist,stage,require.executable" description="bundle up an executable for distribution">
|
||||
<mkdir dir="${package.output.dir}" />
|
||||
<xslt destdir="${package.output.dir}" style="${package.xml.dir}/CreatePackager.xsl" useImplicitFileset="false">
|
||||
<flattenmapper/>
|
||||
|
|
@ -1023,6 +1023,24 @@
|
|||
<delete dir="${pipelinetest.dir}"/>
|
||||
</target>
|
||||
|
||||
<!-- Depend on this target if your target requires a clean working directory but you don't want to depend on clean directly -->
|
||||
<target name="require.clean">
|
||||
<condition property="not.clean">
|
||||
<or>
|
||||
<available file="${build.dir}" />
|
||||
<available file="${lib.dir}" />
|
||||
<available file="${contract.dump.dir}" />
|
||||
<available file="${staging.dir}" />
|
||||
<available file="${dist.dir}" />
|
||||
<available file="${pipelinetest.dir}" />
|
||||
<available file="${javadoc.dir}" />
|
||||
<available file="${scaladoc.dir}" />
|
||||
<available file="${gatkdocs.dir}" />
|
||||
</or>
|
||||
</condition>
|
||||
<fail message="Selected build target requires a clean working directory. Run ant clean and then try again." if="not.clean" />
|
||||
</target>
|
||||
|
||||
|
||||
<!-- ******************************************************************************** -->
|
||||
<!-- gsalib -->
|
||||
|
|
@ -1186,14 +1204,16 @@
|
|||
<echo message="" />
|
||||
<echo message="Sting: Running @{testtype} test cases!"/>
|
||||
|
||||
<!-- no test is allowed to run for more than 10 hours -->
|
||||
<taskdef resource="testngtasks" classpath="${testng.jar}"/>
|
||||
<testng outputDir="@{outputdir}"
|
||||
classpathref="${testng.classpath}"
|
||||
haltOnFailure="false" failureProperty="test.failure"
|
||||
verbose="2"
|
||||
timeout="36000000"
|
||||
workingDir="${basedir}"
|
||||
useDefaultListeners="false"
|
||||
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
|
||||
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
|
||||
<jvmarg value="-Xmx${test.maxmemory}" />
|
||||
<jvmarg value="-ea" />
|
||||
<jvmarg value="-Djava.awt.headless=true" />
|
||||
|
|
|
|||
|
|
@ -67,7 +67,6 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
alleleStratifiedElements[baseIndex].add(pe);
|
||||
}
|
||||
|
||||
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
|
||||
// Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
|
||||
int numReadsToRemove = (int)(pileup.getNumberOfElements() * downsamplingFraction); // floor
|
||||
final TreeSet<PileupElement> elementsToKeep = new TreeSet<PileupElement>(new Comparator<PileupElement>() {
|
||||
|
|
@ -78,12 +77,21 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
}
|
||||
});
|
||||
|
||||
// make a listing of allele counts
|
||||
final int[] alleleCounts = new int[4];
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleCounts[i] = alleleStratifiedElements[i].size();
|
||||
|
||||
// do smart down-sampling
|
||||
final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);
|
||||
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
|
||||
if ( alleleList.size() <= numReadsToRemove )
|
||||
logAllElements(alleleList, log);
|
||||
// if we don't need to remove any reads, keep them all
|
||||
if ( alleleList.size() <= targetAlleleCounts[i] )
|
||||
elementsToKeep.addAll(alleleList);
|
||||
else
|
||||
elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove, log));
|
||||
elementsToKeep.addAll(downsampleElements(alleleList, alleleList.size() - targetAlleleCounts[i], log));
|
||||
}
|
||||
|
||||
// clean up pointers so memory can be garbage collected if needed
|
||||
|
|
@ -93,6 +101,66 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(elementsToKeep));
|
||||
}
|
||||
|
||||
private static int scoreAlleleCounts(final int[] alleleCounts) {
|
||||
if ( alleleCounts.length < 2 )
|
||||
return 0;
|
||||
|
||||
// sort the counts (in ascending order)
|
||||
final int[] alleleCountsCopy = alleleCounts.clone();
|
||||
Arrays.sort(alleleCountsCopy);
|
||||
|
||||
final int maxCount = alleleCountsCopy[alleleCounts.length - 1];
|
||||
final int nextBestCount = alleleCountsCopy[alleleCounts.length - 2];
|
||||
|
||||
int remainderCount = 0;
|
||||
for ( int i = 0; i < alleleCounts.length - 2; i++ )
|
||||
remainderCount += alleleCountsCopy[i];
|
||||
|
||||
// try to get the best score:
|
||||
// - in the het case the counts should be equal with nothing else
|
||||
// - in the hom case the non-max should be zero
|
||||
return Math.min(maxCount - nextBestCount + remainderCount, Math.abs(nextBestCount + remainderCount));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes an allele biased version of the given pileup
|
||||
*
|
||||
* @param alleleCounts the original pileup
|
||||
* @param numReadsToRemove fraction of total reads to remove per allele
|
||||
* @return allele biased pileup
|
||||
*/
|
||||
protected static int[] runSmartDownsampling(final int[] alleleCounts, final int numReadsToRemove) {
|
||||
final int numAlleles = alleleCounts.length;
|
||||
|
||||
int maxScore = scoreAlleleCounts(alleleCounts);
|
||||
int[] alleleCountsOfMax = alleleCounts;
|
||||
|
||||
final int numReadsToRemovePerAllele = numReadsToRemove / 2;
|
||||
|
||||
for ( int i = 0; i < numAlleles; i++ ) {
|
||||
for ( int j = i; j < numAlleles; j++ ) {
|
||||
final int[] newCounts = alleleCounts.clone();
|
||||
|
||||
// split these cases so we don't lose on the floor (since we divided by 2)
|
||||
if ( i == j ) {
|
||||
newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemove);
|
||||
} else {
|
||||
newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemovePerAllele);
|
||||
newCounts[j] = Math.max(0, newCounts[j] - numReadsToRemovePerAllele);
|
||||
}
|
||||
|
||||
final int score = scoreAlleleCounts(newCounts);
|
||||
|
||||
if ( score < maxScore ) {
|
||||
maxScore = score;
|
||||
alleleCountsOfMax = newCounts;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return alleleCountsOfMax;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs allele biased down-sampling on a pileup and computes the list of elements to keep
|
||||
*
|
||||
|
|
@ -102,7 +170,15 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
* @return the list of pileup elements TO KEEP
|
||||
*/
|
||||
private static List<PileupElement> downsampleElements(final ArrayList<PileupElement> elements, final int numElementsToRemove, final PrintStream log) {
|
||||
if ( numElementsToRemove == 0 )
|
||||
return elements;
|
||||
|
||||
final int pileupSize = elements.size();
|
||||
if ( numElementsToRemove == pileupSize ) {
|
||||
logAllElements(elements, log);
|
||||
return new ArrayList<PileupElement>(0);
|
||||
}
|
||||
|
||||
final BitSet itemsToRemove = new BitSet(pileupSize);
|
||||
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
|
||||
itemsToRemove.set(selectedIndex);
|
||||
|
|
@ -132,15 +208,25 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() )
|
||||
totalReads += reads.size();
|
||||
|
||||
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
|
||||
int numReadsToRemove = (int)(totalReads * downsamplingFraction);
|
||||
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove * alleleReadMap.size());
|
||||
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() ) {
|
||||
if ( reads.size() <= numReadsToRemove ) {
|
||||
readsToRemove.addAll(reads);
|
||||
logAllReads(reads, log);
|
||||
} else {
|
||||
readsToRemove.addAll(downsampleReads(reads, numReadsToRemove, log));
|
||||
|
||||
// make a listing of allele counts
|
||||
final List<Allele> alleles = new ArrayList<Allele>(alleleReadMap.keySet());
|
||||
alleles.remove(Allele.NO_CALL); // ignore the no-call bin
|
||||
final int numAlleles = alleles.size();
|
||||
final int[] alleleCounts = new int[numAlleles];
|
||||
for ( int i = 0; i < numAlleles; i++ )
|
||||
alleleCounts[i] = alleleReadMap.get(alleles.get(i)).size();
|
||||
|
||||
// do smart down-sampling
|
||||
final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);
|
||||
|
||||
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove);
|
||||
for ( int i = 0; i < numAlleles; i++ ) {
|
||||
final List<GATKSAMRecord> alleleBin = alleleReadMap.get(alleles.get(i));
|
||||
|
||||
if ( alleleBin.size() > targetAlleleCounts[i] ) {
|
||||
readsToRemove.addAll(downsampleReads(alleleBin, alleleBin.size() - targetAlleleCounts[i], log));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -156,13 +242,22 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
* @return the list of pileup elements TO REMOVE
|
||||
*/
|
||||
private static List<GATKSAMRecord> downsampleReads(final List<GATKSAMRecord> reads, final int numElementsToRemove, final PrintStream log) {
|
||||
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numElementsToRemove);
|
||||
|
||||
if ( numElementsToRemove == 0 )
|
||||
return readsToRemove;
|
||||
|
||||
final int pileupSize = reads.size();
|
||||
if ( numElementsToRemove == pileupSize ) {
|
||||
logAllReads(reads, log);
|
||||
return reads;
|
||||
}
|
||||
|
||||
final BitSet itemsToRemove = new BitSet(pileupSize);
|
||||
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
|
||||
itemsToRemove.set(selectedIndex);
|
||||
}
|
||||
|
||||
ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(pileupSize - numElementsToRemove);
|
||||
for ( int i = 0; i < pileupSize; i++ ) {
|
||||
if ( itemsToRemove.get(i) ) {
|
||||
final GATKSAMRecord read = reads.get(i);
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
@ -41,7 +40,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
|
|
@ -212,7 +214,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
private VariantAnnotatorEngine annotationEngine;
|
||||
|
||||
// fasta reference reader to supplement the edges of the reference sequence
|
||||
private IndexedFastaSequenceFile referenceReader;
|
||||
private CachingIndexedFastaSequenceFile referenceReader;
|
||||
|
||||
// reference base padding size
|
||||
private static final int REFERENCE_PADDING = 900;
|
||||
|
|
@ -324,15 +326,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
}
|
||||
}
|
||||
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
|
||||
return new ActivityProfileResult(1.0);
|
||||
return new ActivityProfileResult(ref.getLocus(), 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
if( USE_ALLELES_TRIGGER ) {
|
||||
return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
|
||||
return new ActivityProfileResult( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
|
||||
}
|
||||
|
||||
if( context == null ) { return new ActivityProfileResult(0.0); }
|
||||
if( context == null ) { return new ActivityProfileResult(ref.getLocus(), 0.0); }
|
||||
|
||||
final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
|
@ -369,7 +371,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
|
||||
|
||||
return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
|
||||
return new ActivityProfileResult( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
/**
|
||||
* Basic unit test for AlleleBiasedDownsamplingUtils
|
||||
*/
|
||||
public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void testSmartDownsampling() {
|
||||
|
||||
final int[] idealHetAlleleCounts = new int[]{0, 50, 0, 50};
|
||||
final int[] idealHomAlleleCounts = new int[]{0, 100, 0, 0};
|
||||
|
||||
// no contamination, no removal
|
||||
testOneCase(0, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
testOneCase(0, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
|
||||
|
||||
// hom sample, het contaminant, different alleles
|
||||
testOneCase(5, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
|
||||
testOneCase(0, 0, 5, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
|
||||
testOneCase(0, 0, 0, 5, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
|
||||
|
||||
// hom sample, hom contaminant, different alleles
|
||||
testOneCase(10, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
|
||||
testOneCase(0, 0, 10, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
|
||||
testOneCase(0, 0, 0, 10, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
|
||||
|
||||
// het sample, het contaminant, different alleles
|
||||
testOneCase(5, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
testOneCase(0, 0, 5, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
|
||||
// het sample, hom contaminant, different alleles
|
||||
testOneCase(10, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
testOneCase(0, 0, 10, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
|
||||
// hom sample, het contaminant, overlapping alleles
|
||||
final int[] enhancedHomAlleleCounts = new int[]{0, 105, 0, 0};
|
||||
testOneCase(5, 5, 0, 0, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
|
||||
testOneCase(0, 5, 5, 0, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
|
||||
testOneCase(0, 5, 0, 5, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
|
||||
|
||||
// hom sample, hom contaminant, overlapping alleles
|
||||
testOneCase(0, 10, 0, 0, 0.1, 100, idealHomAlleleCounts, new int[]{0, 110, 0, 0});
|
||||
|
||||
// het sample, het contaminant, overlapping alleles
|
||||
testOneCase(5, 5, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
testOneCase(0, 5, 5, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
testOneCase(0, 5, 0, 5, 0.1, 100, idealHetAlleleCounts, new int[]{0, 55, 0, 55});
|
||||
testOneCase(5, 0, 0, 5, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
testOneCase(0, 0, 5, 5, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
|
||||
// het sample, hom contaminant, overlapping alleles
|
||||
testOneCase(0, 10, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
testOneCase(0, 0, 0, 10, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
|
||||
}
|
||||
|
||||
private static void testOneCase(final int addA, final int addC, final int addG, final int addT, final double contaminationFraction,
|
||||
final int pileupSize, final int[] initialCounts, final int[] targetCounts) {
|
||||
|
||||
final int[] actualCounts = initialCounts.clone();
|
||||
actualCounts[0] += addA;
|
||||
actualCounts[1] += addC;
|
||||
actualCounts[2] += addG;
|
||||
actualCounts[3] += addT;
|
||||
|
||||
final int[] results = AlleleBiasedDownsamplingUtils.runSmartDownsampling(actualCounts, (int)(pileupSize * contaminationFraction));
|
||||
Assert.assertTrue(countsAreEqual(results, targetCounts));
|
||||
}
|
||||
|
||||
private static boolean countsAreEqual(final int[] counts1, final int[] counts2) {
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
if ( counts1[i] != counts2[i] )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -90,6 +90,21 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
executeTest("testBQSRFailWithoutDBSNP", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBQSRCSV() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
" -T BaseRecalibrator" +
|
||||
" -R " + b36KGReference +
|
||||
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
|
||||
" -knownSites " + b36dbSNP129 +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" -o /dev/null" +
|
||||
" --plot_pdf_file /dev/null" +
|
||||
" --intermediate_csv_file %s",
|
||||
Arrays.asList("d1c38a3418979400630e2bca1140689c"));
|
||||
executeTest("testBQSR-CSVfile", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBQSRFailWithSolidNoCall() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a"));
|
||||
Arrays.asList("847605f4efafef89529fe0e496315edd"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de"));
|
||||
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("24532eb381724cd74e99370da28d49ed"));
|
||||
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("062a946160eec1d0fc135d58ca654ff4"));
|
||||
Arrays.asList("fea530fdc8677e10be4cc11625fa5376"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("94dc17d76d841f1d3a36160767ffa034"));
|
||||
Arrays.asList("704888987baacff8c7b273b8ab9938d0"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("9106d01ca0d0a8fedd068e72d509f380"));
|
||||
Arrays.asList("e14c9b1f9f34d6c16de445bfa385be89"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("d847acf841ba8ba653f996ce4869f439"));
|
||||
Arrays.asList("fb204e821a24d03bd3a671b6e01c449a"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "5b8f477c287770b5769b05591e35bc2d";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("56157d930da6ccd224bce1ca93f11e41"));
|
||||
Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
|
|
@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("6ccb9bd88934e4272d0ce362dd35e603"));
|
||||
Arrays.asList("55760482335497086458b09e415ecf54"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("480437dd6e2760f4ab3194431519f331"));
|
||||
Arrays.asList("938e888a40182878be4c3cc4859adb69"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("22c039412fd387dde6125b07c9a74a25"));
|
||||
Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
|
|
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb");
|
||||
testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "81fff490c0f59890f1e75dc290833434");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("df524e98903d96ab9353bee7c16a69de"));
|
||||
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" );
|
||||
testHeterozosity( 0.01, "8dd37249e0a80afa86594c3f1e720760" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" );
|
||||
testHeterozosity( 1.0 / 1850, "040d169e20fda56f8de009a6015eb384" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835"));
|
||||
Arrays.asList("0e4713e4aa44f4f8fcfea7138295a627"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52"));
|
||||
Arrays.asList("46ea5d1ceb8eed1d0db63c3577915d6c"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("3593495aab5f6204c65de0b073a6ff65"));
|
||||
Arrays.asList("50329e15e5139be9e3b643f0b3ba8a53"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("8b486a098029d5a106b0a37eff541c15"));
|
||||
Arrays.asList("2b85e3bd6bf981afaf7324666740d74b"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("18efedc50cae2aacaba372265e38310b"));
|
||||
Arrays.asList("a6fd46eff78827060451a62cffd698a7"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f"));
|
||||
Arrays.asList("b8129bf754490cc3c76191d8cc4ec93f"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("578c0540f4f2052a634a829bcb9cc27d"));
|
||||
Arrays.asList("591332fa0b5b22778cf820ee257049d2"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e"));
|
||||
Arrays.asList("a4761d7f25e7a62f34494801c98a0da7"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352"));
|
||||
Arrays.asList("c526c234947482d1cd2ffc5102083a08"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("857b8e5df444463ac27f665c4f67fbe2"));
|
||||
Arrays.asList("90adefd39ed67865b0cb275ad0f07383"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("81d4c7d9010fd6733b2997bc378e7471"));
|
||||
Arrays.asList("2fded43949e258f8e9f68893c61c1bdd"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNsInCigar() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
|
||||
Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f"));
|
||||
Arrays.asList("d6d40bacd540a41f305420dfea35e04a"));
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
|
||||
|
|
@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba"));
|
||||
Arrays.asList("c1077662411164182c5f75478344f83d"));
|
||||
executeTest("test calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "e7fc11baf208a1bca7b462d3148c936e");
|
||||
testReducedCalling("SNP", "f5ccbc96d0d66832dd9b3c5cb6507db4");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5");
|
||||
testReducedCalling("INDEL", "3c02ee5187933bed44dc416a2e28511f");
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testContaminationDownsampling() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
|
||||
Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae"));
|
||||
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
|
||||
executeTest("test contamination_percentage_to_filter 0.20", spec);
|
||||
}
|
||||
|
||||
|
|
@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e");
|
||||
HCTest(CEUTRIO_BAM, "", "56aa4b84606b6b0b7dc78a383974d1b3");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215");
|
||||
HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469");
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "39da622b309597d7a0b082c8aa1748c9");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a");
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "7fbc6b9e27e374f2ffe4be952d88c7c6");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
|
|
@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939"));
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("788176e1717bd28fc7cbc8e3efbb6100"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("96ab8253d242b851ccfc218759f79784"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9"));
|
||||
Arrays.asList("425f1a0fb00d7145edf1c55e54346fae"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.contexts;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -39,10 +38,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class ReferenceContext {
|
||||
final public static boolean UPPERCASE_REFERENCE = true;
|
||||
|
||||
/**
|
||||
* Facilitates creation of new GenomeLocs.
|
||||
*/
|
||||
|
|
@ -59,7 +55,8 @@ public class ReferenceContext {
|
|||
final private GenomeLoc window;
|
||||
|
||||
/**
|
||||
* The bases in the window around the current locus. If null, then bases haven't been fetched yet
|
||||
* The bases in the window around the current locus. If null, then bases haven't been fetched yet.
|
||||
* Bases are always upper cased
|
||||
*/
|
||||
private byte[] basesCache = null;
|
||||
|
||||
|
|
@ -81,7 +78,7 @@ public class ReferenceContext {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
@Ensures({"result != null"})
|
||||
public byte[] getBases();
|
||||
}
|
||||
|
||||
|
|
@ -146,7 +143,9 @@ public class ReferenceContext {
|
|||
private void fetchBasesFromProvider() {
|
||||
if ( basesCache == null ) {
|
||||
basesCache = basesProvider.getBases();
|
||||
if (UPPERCASE_REFERENCE) StringUtil.toUpperCase(basesCache);
|
||||
|
||||
// must be an assertion that only runs when the bases are fetch to run in a reasonable amount of time
|
||||
assert BaseUtils.isUpperCase(basesCache);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -194,6 +193,7 @@ public class ReferenceContext {
|
|||
/**
|
||||
* All the bases in the window from the current base forward to the end of the window.
|
||||
*/
|
||||
@Ensures({"result != null", "result.length > 0"})
|
||||
public byte[] getForwardBases() {
|
||||
final byte[] bases = getBases();
|
||||
final int mid = locus.getStart() - window.getStart();
|
||||
|
|
|
|||
|
|
@ -198,8 +198,6 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
|
||||
source.close();
|
||||
file.delete(); // this should be last to aid in debugging when the process fails
|
||||
} catch (UserException e) {
|
||||
throw new ReviewedStingException("BUG: intermediate file " + file + " is malformed, got error while reading", e);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
public enum GATKReportVersion {
|
||||
/**
|
||||
|
|
@ -91,6 +92,6 @@ public enum GATKReportVersion {
|
|||
if (header.startsWith("#:GATKReport.v1.1"))
|
||||
return GATKReportVersion.V1_1;
|
||||
|
||||
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
|
||||
throw new UserException.BadInput("The GATK report has an unknown/unsupported version in the header: " + header);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
|
|
@ -46,99 +45,127 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
T sum) {
|
||||
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = getLocusView( walker, dataProvider );
|
||||
final GenomeLocSortedSet initialIntervals = engine.getIntervals();
|
||||
final LocusView locusView = new AllLocusView(dataProvider);
|
||||
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
int minStart = Integer.MAX_VALUE;
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||
int minStart = Integer.MAX_VALUE;
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
GenomeLoc location = locus.getLocation();
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
if(prevLoc != null) {
|
||||
// fill in the active / inactive labels from the stop of the previous location to the start of this location
|
||||
// TODO refactor to separate function
|
||||
for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
|
||||
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
|
||||
profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
|
||||
}
|
||||
}
|
||||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||
// Note that this must occur before we leave because we are outside the intervals because
|
||||
// reads may occur outside our intervals but overlap them in the future
|
||||
// TODO -- this whole HashSet logic should be changed to a linked list of reads with
|
||||
// TODO -- subsequent pass over them to find the ones overlapping the active regions
|
||||
for( final PileupElement p : locus.getBasePileup() ) {
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
if( !myReads.contains(read) ) {
|
||||
myReads.add(read);
|
||||
}
|
||||
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
|
||||
profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
}
|
||||
|
||||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||
for( final PileupElement p : locus.getBasePileup() ) {
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
if( !myReads.contains(read) ) {
|
||||
myReads.add(read);
|
||||
}
|
||||
|
||||
// If this is the last pileup for this shard calculate the minimum alignment start so that we know
|
||||
// which active regions in the work queue are now safe to process
|
||||
minStart = Math.min(minStart, read.getAlignmentStart());
|
||||
}
|
||||
|
||||
prevLoc = location;
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
// If this is the last pileup for this shard calculate the minimum alignment start so that we know
|
||||
// which active regions in the work queue are now safe to process
|
||||
minStart = Math.min(minStart, read.getAlignmentStart());
|
||||
}
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
// skip this location -- it's not part of our engine intervals
|
||||
// TODO -- this is dangerously slow with current overlaps implementation : GSA-649 / GenomeLocSortedSet.overlaps is crazy slow
|
||||
if ( outsideEngineIntervals(location) )
|
||||
continue;
|
||||
|
||||
// Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
// add these blocks of work to the work queue
|
||||
// band-pass filter the list of isActive probabilities and turn into active regions
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
if( !activeRegions.isEmpty() ) {
|
||||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
}
|
||||
|
||||
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// now go and process all of the active regions
|
||||
sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
|
||||
prevLoc = location;
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
if( !activeRegions.isEmpty() ) {
|
||||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
}
|
||||
|
||||
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
|
||||
// now go and process all of the active regions
|
||||
sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the loc outside of the intervals being requested for processing by the GATK?
|
||||
* @param loc
|
||||
* @return
|
||||
*/
|
||||
private boolean outsideEngineIntervals(final GenomeLoc loc) {
|
||||
return engine.getIntervals() != null && ! engine.getIntervals().overlaps(loc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
* add these blocks of work to the work queue
|
||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||
*
|
||||
* @param profile
|
||||
* @param activeRegions
|
||||
* @param activeRegionExtension
|
||||
* @param maxRegionSize
|
||||
* @return
|
||||
*/
|
||||
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
|
||||
final List<ActiveRegion> activeRegions,
|
||||
final int activeRegionExtension,
|
||||
final int maxRegionSize) {
|
||||
if ( profile.isEmpty() )
|
||||
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
|
||||
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
|
||||
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -150,7 +177,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
|
|
@ -250,30 +277,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
return walker.reduce( x, sum );
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// engine interaction code
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Gets the best view of loci for this walker given the available data.
|
||||
* @param walker walker to interrogate.
|
||||
* @param dataProvider Data which which to drive the locus view.
|
||||
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
|
||||
*/
|
||||
private LocusView getLocusView( final Walker<M,T> walker, final LocusShardDataProvider dataProvider ) {
|
||||
final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||
if( dataSource == DataSource.READS )
|
||||
return new CoveredLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
|
||||
return new AllLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new RodLocusView(dataProvider);
|
||||
else
|
||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||
}
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
|
|
|
|||
|
|
@ -75,8 +75,9 @@ public class RecalibrationArgumentCollection {
|
|||
|
||||
/**
|
||||
* If not provided, then a temporary file is created and then deleted upon completion.
|
||||
* For advanced users only.
|
||||
*/
|
||||
@Hidden
|
||||
@Advanced
|
||||
@Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
|
||||
public File RECAL_CSV_FILE = null;
|
||||
|
||||
|
|
|
|||
|
|
@ -191,7 +191,7 @@ public class CallableLoci extends LocusWalker<CallableLoci.CallableBaseState, Ca
|
|||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "format", shortName = "format", doc = "Output format", required = false)
|
||||
OutputFormat outputFormat;
|
||||
OutputFormat outputFormat = OutputFormat.BED;
|
||||
|
||||
public enum OutputFormat {
|
||||
/**
|
||||
|
|
@ -297,7 +297,7 @@ public class CallableLoci extends LocusWalker<CallableLoci.CallableBaseState, Ca
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s %d %d %s", loc.getContig(), loc.getStart(), loc.getStop(), state);
|
||||
return String.format("%s\t%d\t%d\t%s", loc.getContig(), loc.getStart()-1, loc.getStop(), state);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
|||
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
|
||||
|
||||
// note the linear probability scale
|
||||
return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));
|
||||
return new ActivityProfileResult(ref.getLocus(), Math.min(depth / coverageThreshold, 1));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
|
|
@ -236,6 +236,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
* then extensions (".bam" or ".sam") will be stripped from the input file names and the provided string value will be pasted on instead; 2) if the
|
||||
* value ends with a '.map' (e.g. input_output.map), then the two-column tab-separated file with the specified name must exist and list unique output
|
||||
* file name (2nd column) for each input file name (1st column).
|
||||
*
|
||||
* Note that some GATK arguments do NOT work in conjunction with nWayOut (e.g. --disable_bam_indexing).
|
||||
*/
|
||||
@Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file")
|
||||
protected String N_WAY_OUT = null;
|
||||
|
|
@ -274,7 +276,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
protected String OUT_SNPS = null;
|
||||
|
||||
// fasta reference reader to supplement the edges of the reference sequence
|
||||
private IndexedFastaSequenceFile referenceReader;
|
||||
private CachingIndexedFastaSequenceFile referenceReader;
|
||||
|
||||
// the intervals input by the user
|
||||
private Iterator<GenomeLoc> intervals = null;
|
||||
|
|
@ -1601,7 +1603,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
public List<GATKSAMRecord> getReads() { return reads; }
|
||||
|
||||
public byte[] getReference(IndexedFastaSequenceFile referenceReader) {
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) {
|
||||
// set up the reference if we haven't done so yet
|
||||
if ( reference == null ) {
|
||||
// first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read)
|
||||
|
|
@ -1609,7 +1612,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
|
||||
loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight);
|
||||
reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
|
||||
StringUtil.toUpperCase(reference);
|
||||
}
|
||||
|
||||
return reference;
|
||||
|
|
|
|||
|
|
@ -1,249 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* @author rpoplin
|
||||
* @since Apr 6, 2010
|
||||
*/
|
||||
|
||||
//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
|
||||
@Deprecated
|
||||
public class VariantQualityScore {
|
||||
// TODO - this should really be a stratification
|
||||
|
||||
// public class VariantQualityScore extends VariantEvaluator {
|
||||
//
|
||||
// // a mapping from quality score histogram bin to Ti/Tv ratio
|
||||
// @DataPoint(description = "the Ti/Tv ratio broken out by variant quality")
|
||||
// TiTvStats titvStats = null;
|
||||
//
|
||||
// @DataPoint(description = "average variant quality for each allele count")
|
||||
// AlleleCountStats alleleCountStats = null;
|
||||
//
|
||||
// static class TiTvStats extends TableType {
|
||||
// final static int NUM_BINS = 20;
|
||||
// final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
|
||||
// final long transitionByQuality[] = new long[NUM_BINS];
|
||||
// final long transversionByQuality[] = new long[NUM_BINS];
|
||||
// final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
|
||||
//
|
||||
// public Object[] getRowKeys() {
|
||||
// return new String[]{"sample"};
|
||||
// }
|
||||
//
|
||||
// public Object[] getColumnKeys() {
|
||||
// final String columnKeys[] = new String[NUM_BINS];
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// columnKeys[iii] = "titvBin" + iii;
|
||||
// }
|
||||
// return columnKeys;
|
||||
// }
|
||||
//
|
||||
// public String getCell(int x, int y) {
|
||||
// return String.valueOf(titvByQuality[y]);
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// StringBuffer returnString = new StringBuffer();
|
||||
// // output the ti/tv array
|
||||
// returnString.append("titvByQuality: ");
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// returnString.append(titvByQuality[iii]);
|
||||
// returnString.append(" ");
|
||||
// }
|
||||
// return returnString.toString();
|
||||
// }
|
||||
//
|
||||
// public void incrValue( final double qual, final boolean isTransition ) {
|
||||
// final Integer qualKey = Math.round((float) qual);
|
||||
// final long numTransition = (isTransition ? 1L : 0L);
|
||||
// final long numTransversion = (isTransition ? 0L : 1L);
|
||||
// if( qualByIsTransition.containsKey(qualKey) ) {
|
||||
// Pair<Long,Long> transitionPair = qualByIsTransition.get(qualKey);
|
||||
// transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion);
|
||||
// qualByIsTransition.put(qualKey, transitionPair);
|
||||
// } else {
|
||||
// qualByIsTransition.put(qualKey, new Pair<Long,Long>(numTransition,numTransversion));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public void organizeTiTvTables() {
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// transitionByQuality[iii] = 0L;
|
||||
// transversionByQuality[iii] = 0L;
|
||||
// titvByQuality[iii] = 0.0;
|
||||
// }
|
||||
//
|
||||
// int maxQual = 0;
|
||||
//
|
||||
// // Calculate the maximum quality score in order to normalize and histogram
|
||||
// for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||
// if( qual > maxQual ) {
|
||||
// maxQual = qual;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1));
|
||||
//
|
||||
// for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||
// final int index = (int)Math.floor( ((double) qual) / binSize );
|
||||
// if( index >= 0 ) { // BUGBUG: why is there overflow here?
|
||||
// Pair<Long,Long> transitionPair = qualByIsTransition.get(qual);
|
||||
// transitionByQuality[index] += transitionPair.getFirst();
|
||||
// transversionByQuality[index] += transitionPair.getSecond();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
|
||||
// titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
|
||||
// } else {
|
||||
// titvByQuality[iii] = 0.0;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// class AlleleCountStats extends TableType {
|
||||
// final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
|
||||
// final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
|
||||
//
|
||||
// public Object[] getRowKeys() {
|
||||
// final int NUM_BINS = qualityListMap.keySet().size();
|
||||
// final String rowKeys[] = new String[NUM_BINS];
|
||||
// int iii = 0;
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// rowKeys[iii] = "AC" + key;
|
||||
// iii++;
|
||||
// }
|
||||
// return rowKeys;
|
||||
//
|
||||
// }
|
||||
//
|
||||
// public Object[] getColumnKeys() {
|
||||
// return new String[]{"alleleCount","avgQual"};
|
||||
// }
|
||||
//
|
||||
// public String getCell(int x, int y) {
|
||||
// int iii = 0;
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// if(iii == x) {
|
||||
// if(y == 0) { return String.valueOf(key); }
|
||||
// else { return String.valueOf(qualityMap.get(key)); }
|
||||
// }
|
||||
// iii++;
|
||||
// }
|
||||
// return null;
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// String returnString = "";
|
||||
// // output the quality map
|
||||
// returnString += "AlleleCountStats: ";
|
||||
// //for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// // returnString += titvByQuality[iii] + " ";
|
||||
// //}
|
||||
// return returnString;
|
||||
// }
|
||||
//
|
||||
// public void incrValue( final double qual, final int alleleCount ) {
|
||||
// ArrayList<Double> list = qualityListMap.get(alleleCount);
|
||||
// if(list==null) { list = new ArrayList<Double>(); }
|
||||
// list.add(qual);
|
||||
// qualityListMap.put(alleleCount, list);
|
||||
// }
|
||||
//
|
||||
// public void organizeAlleleCountTables() {
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// final ArrayList<Double> list = qualityListMap.get(key);
|
||||
// double meanQual = 0.0;
|
||||
// final double numQuals = (double)list.size();
|
||||
// for( Double qual : list ) {
|
||||
// meanQual += qual / numQuals;
|
||||
// }
|
||||
// qualityMap.put(key, meanQual);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// //public VariantQualityScore(VariantEvalWalker parent) {
|
||||
// //super(parent);
|
||||
// //}
|
||||
//
|
||||
// public String getName() {
|
||||
// return "VariantQualityScore";
|
||||
// }
|
||||
//
|
||||
// public int getComparisonOrder() {
|
||||
// return 1; // we only need to see each eval track
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// return getName();
|
||||
// }
|
||||
//
|
||||
// public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
// final String interesting = null;
|
||||
//
|
||||
// if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
// if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
// titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||
//
|
||||
// if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
|
||||
// int alternateAlleleCount = 0;
|
||||
// for (final Allele a : eval.getAlternateAlleles()) {
|
||||
// alternateAlleleCount += eval.getCalledChrCount(a);
|
||||
// }
|
||||
// alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
|
||||
// }
|
||||
//
|
||||
// return interesting; // This module doesn't capture any interesting sites, so return null
|
||||
// }
|
||||
//
|
||||
// public void finalizeEvaluation() {
|
||||
// if( titvStats != null ) {
|
||||
// titvStats.organizeTiTvTables();
|
||||
// }
|
||||
// if( alleleCountStats != null ) {
|
||||
// alleleCountStats.organizeAlleleCountTables();
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
@ -659,7 +659,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
return (g !=null && !g.isHomRef() && (g.isCalled() || (g.isFiltered() && !EXCLUDE_FILTERED)));
|
||||
}
|
||||
|
||||
private boolean haveSameGenotypes(Genotype g1, Genotype g2) {
|
||||
private boolean haveSameGenotypes(final Genotype g1, final Genotype g2) {
|
||||
if ( g1 == null || g2 == null )
|
||||
return false;
|
||||
|
||||
if ((g1.isCalled() && g2.isFiltered()) ||
|
||||
(g2.isCalled() && g1.isFiltered()) ||
|
||||
(g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED))
|
||||
|
|
|
|||
|
|
@ -24,33 +24,6 @@ public class BaseUtils {
|
|||
public final static byte[] BASES = {'A', 'C', 'G', 'T'};
|
||||
public final static byte[] EXTENDED_BASES = {'A', 'C', 'G', 'T', 'N', 'D'};
|
||||
|
||||
public enum Base {
|
||||
A('A', 0),
|
||||
C('C', 1),
|
||||
G('G', 2),
|
||||
T('T', 3);
|
||||
|
||||
byte b;
|
||||
int index;
|
||||
|
||||
private Base(char base, int index) {
|
||||
this.b = (byte) base;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
public byte getBase() { return b; }
|
||||
|
||||
public char getBaseAsChar() { return (char) b; }
|
||||
|
||||
public int getIndex() { return index; }
|
||||
|
||||
public boolean sameBase(byte o) { return b == o; }
|
||||
|
||||
public boolean sameBase(char o) { return b == (byte) o; }
|
||||
|
||||
public boolean sameBase(int i) { return index == i; }
|
||||
}
|
||||
|
||||
static private final int[] baseIndexMap = new int[256];
|
||||
static {
|
||||
Arrays.fill(baseIndexMap, -1);
|
||||
|
|
@ -130,6 +103,17 @@ public class BaseUtils {
|
|||
return false;
|
||||
}
|
||||
|
||||
public static boolean isUpperCase(final byte[] bases) {
|
||||
for ( byte base : bases )
|
||||
if ( ! isUpperCase(base) )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean isUpperCase(final byte base) {
|
||||
return base >= 'A' && base <= 'Z';
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a IUPAC nucleotide code to a pair of bases
|
||||
*
|
||||
|
|
@ -271,59 +255,6 @@ public class BaseUtils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a base index to a base index representing its cross-talk partner
|
||||
*
|
||||
* @param baseIndex 0, 1, 2, 3
|
||||
* @return 1, 0, 3, 2, or -1 if the index can't be understood
|
||||
*/
|
||||
static public int crossTalkPartnerIndex(int baseIndex) {
|
||||
switch (baseIndex) {
|
||||
case 0:
|
||||
return 1; // A -> C
|
||||
case 1:
|
||||
return 0; // C -> A
|
||||
case 2:
|
||||
return 3; // G -> T
|
||||
case 3:
|
||||
return 2; // T -> G
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a base to the base representing its cross-talk partner
|
||||
*
|
||||
* @param base [AaCcGgTt]
|
||||
* @return C, A, T, G, or '.' if the base can't be understood
|
||||
*/
|
||||
@Deprecated
|
||||
static public char crossTalkPartnerBase(char base) {
|
||||
return (char) baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the complement of a base index.
|
||||
*
|
||||
* @param baseIndex the base index (0:A, 1:C, 2:G, 3:T)
|
||||
* @return the complementary base index
|
||||
*/
|
||||
static public byte complementIndex(int baseIndex) {
|
||||
switch (baseIndex) {
|
||||
case 0:
|
||||
return 3; // a -> t
|
||||
case 1:
|
||||
return 2; // c -> g
|
||||
case 2:
|
||||
return 1; // g -> c
|
||||
case 3:
|
||||
return 0; // t -> a
|
||||
default:
|
||||
return -1; // wtf?
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base).
|
||||
*
|
||||
|
|
@ -350,7 +281,7 @@ public class BaseUtils {
|
|||
}
|
||||
|
||||
@Deprecated
|
||||
static public char simpleComplement(char base) {
|
||||
static private char simpleComplement(char base) {
|
||||
return (char) simpleComplement((byte) base);
|
||||
}
|
||||
|
||||
|
|
@ -370,22 +301,6 @@ public class BaseUtils {
|
|||
return rcbases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form)
|
||||
*
|
||||
* @param bases the byte array of bases
|
||||
* @return the complement of the base byte array
|
||||
*/
|
||||
static public byte[] simpleComplement(byte[] bases) {
|
||||
byte[] rcbases = new byte[bases.length];
|
||||
|
||||
for (int i = 0; i < bases.length; i++) {
|
||||
rcbases[i] = simpleComplement(bases[i]);
|
||||
}
|
||||
|
||||
return rcbases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse complement a char array of bases
|
||||
*
|
||||
|
|
@ -403,23 +318,6 @@ public class BaseUtils {
|
|||
return rcbases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Complement a char array of bases
|
||||
*
|
||||
* @param bases the char array of bases
|
||||
* @return the complement of the base char array
|
||||
*/
|
||||
@Deprecated
|
||||
static public char[] simpleComplement(char[] bases) {
|
||||
char[] rcbases = new char[bases.length];
|
||||
|
||||
for (int i = 0; i < bases.length; i++) {
|
||||
rcbases[i] = simpleComplement(bases[i]);
|
||||
}
|
||||
|
||||
return rcbases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse complement a String of bases. Preserves ambiguous bases.
|
||||
*
|
||||
|
|
@ -431,17 +329,6 @@ public class BaseUtils {
|
|||
return new String(simpleReverseComplement(bases.getBytes()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Complement a String of bases. Preserves ambiguous bases.
|
||||
*
|
||||
* @param bases the String of bases
|
||||
* @return the complement of the String
|
||||
*/
|
||||
@Deprecated
|
||||
static public String simpleComplement(String bases) {
|
||||
return new String(simpleComplement(bases.getBytes()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the uppercased version of the bases
|
||||
*
|
||||
|
|
@ -543,82 +430,4 @@ public class BaseUtils {
|
|||
|
||||
return randomBaseIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a random base (A, C, G, T).
|
||||
*
|
||||
* @return a random base (A, C, G, T)
|
||||
*/
|
||||
@Deprecated
|
||||
static public byte getRandomBase() {
|
||||
return getRandomBase('.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a random base, excluding some base.
|
||||
*
|
||||
* @param excludeBase the base to exclude
|
||||
* @return a random base, excluding the one specified (A, C, G, T)
|
||||
*/
|
||||
@Deprecated
|
||||
static public byte getRandomBase(char excludeBase) {
|
||||
return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
|
||||
* that for all i = 0... seq.length-1, seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p).
|
||||
* The sequence does <i>not</i> have to contain whole number of periods. For instance, "ACACACAC" has a period
|
||||
* of 2 (it has a period of 4 as well), and so does
|
||||
* "ACACA"; similarly, smallest periods of "CTCCTC", "CTCCT", and "CTCC" are all equal to 3. The "trivial" period is
|
||||
* the length of the string itself, and it will always be returned if no smaller period can be found in the specified period range
|
||||
* or if specified minPeriod is greater than the sequence length.
|
||||
*
|
||||
* @param seq
|
||||
* @return
|
||||
*/
|
||||
public static int sequencePeriod(byte[] seq, int minPeriod) {
|
||||
int period = (minPeriod > seq.length ? seq.length : minPeriod);
|
||||
// we assume that bases [0,period-1] repeat themselves and check this assumption
|
||||
// until we find correct period
|
||||
|
||||
for (int pos = period; pos < seq.length; pos++) {
|
||||
|
||||
int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
|
||||
// if our current hypothesis holds, base[pos] must be the same as base[offset]
|
||||
|
||||
if (Character.toUpperCase(seq[pos]) != Character.toUpperCase(seq[offset])) {
|
||||
|
||||
// period we have been trying so far does not work.
|
||||
// two possibilities:
|
||||
// A) offset = 0, i.e. current position pos must be start of the next repeat, but it is not;
|
||||
// in this case only bases from start up to the current one, inclusive, may form a repeat, if at all;
|
||||
// so period is at least pos+1 (remember, pos is 0-based), then on the next loop re-entrance
|
||||
// pos will be autoincremented and we will be checking next base
|
||||
// B) offset != 0, i.e. the current base breaks the repeat, but maybe it starts a new one?
|
||||
// hence we should first check if it matches the first base of the sequence, and to do that
|
||||
// we set period to pos (thus trying the hypothesis that bases from start up to the current one,
|
||||
// non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
|
||||
// on the next loop re-entrance after pos is autoincremented)
|
||||
if (offset == 0)
|
||||
period = pos + 1;
|
||||
else
|
||||
period = pos--;
|
||||
|
||||
}
|
||||
}
|
||||
return period;
|
||||
}
|
||||
}
|
||||
|
||||
/* code snippet for testing sequencePeriod():
|
||||
*
|
||||
* String str = "CCTTG";
|
||||
int p = 0;
|
||||
System.out.print("Periods of " + str +" are:");
|
||||
while ( p < str.length() ) {
|
||||
p = sequencePeriod(str, p+1);
|
||||
System.out.print(" "+p);
|
||||
}
|
||||
System.out.println(); System.exit(1);
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -54,27 +54,31 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
|
||||
public ArrayList<GATKSAMRecord> getReads() { return reads; }
|
||||
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader ) {
|
||||
return getActiveRegionReference(referenceReader, 0);
|
||||
}
|
||||
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return getReference( referenceReader, padding, extendedLoc );
|
||||
}
|
||||
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader ) {
|
||||
return getFullReference(referenceReader, 0);
|
||||
}
|
||||
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return getReference( referenceReader, padding, fullExtentReferenceLoc );
|
||||
}
|
||||
|
||||
private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
private byte[] getReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||
final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(),
|
||||
Math.max(1, genomeLoc.getStart() - padding),
|
||||
Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases();
|
||||
StringUtil.toUpperCase(reference);
|
||||
return reference;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,11 +24,11 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
|
@ -45,6 +45,7 @@ public class ActivityProfile {
|
|||
final GenomeLocParser parser;
|
||||
final boolean presetRegions;
|
||||
GenomeLoc regionStartLoc = null;
|
||||
GenomeLoc regionStopLoc = null;
|
||||
final List<ActivityProfileResult> isActiveList;
|
||||
private static final int FILTER_SIZE = 80;
|
||||
private static final double[] GaussianKernel;
|
||||
|
|
@ -71,19 +72,49 @@ public class ActivityProfile {
|
|||
this.regionStartLoc = regionStartLoc;
|
||||
}
|
||||
|
||||
public void add(final GenomeLoc loc, final ActivityProfileResult result) {
|
||||
if ( loc.size() != 1 )
|
||||
throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" );
|
||||
isActiveList.add(result);
|
||||
if( regionStartLoc == null ) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ActivityProfile{" +
|
||||
"start=" + regionStartLoc +
|
||||
", stop=" + regionStopLoc +
|
||||
'}';
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the next ActivityProfileResult to this profile.
|
||||
*
|
||||
* Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown
|
||||
*
|
||||
* @param result a well-formed ActivityProfileResult result to incorporate into this profile
|
||||
*/
|
||||
@Requires("result != null")
|
||||
public void add(final ActivityProfileResult result) {
|
||||
final GenomeLoc loc = result.getLoc();
|
||||
|
||||
if ( regionStartLoc == null ) {
|
||||
regionStartLoc = loc;
|
||||
regionStopLoc = loc;
|
||||
} else {
|
||||
if ( regionStopLoc.getStart() != loc.getStart() - 1 )
|
||||
throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc );
|
||||
regionStopLoc = loc;
|
||||
}
|
||||
|
||||
isActiveList.add(result);
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return isActiveList.size();
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return isActiveList.isEmpty();
|
||||
}
|
||||
|
||||
public boolean hasPresetRegions() {
|
||||
return presetRegions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Band pass this ActivityProfile, producing a new profile that's band pass filtered
|
||||
* @return a new ActivityProfile that's the band-pass filtered version of this profile
|
||||
|
|
@ -104,14 +135,21 @@ public class ActivityProfile {
|
|||
}
|
||||
iii++;
|
||||
}
|
||||
final double[] filteredProbArray = new double[activeProbArray.length];
|
||||
|
||||
final double[] filteredProbArray;
|
||||
if( !presetRegions ) {
|
||||
// if we aren't using preset regions, actually apply the band pass filter for activeProbArray into filteredProbArray
|
||||
filteredProbArray = new double[activeProbArray.length];
|
||||
for( iii = 0; iii < activeProbArray.length; iii++ ) {
|
||||
final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
|
||||
final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
|
||||
filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
|
||||
}
|
||||
} else {
|
||||
// otherwise we simply use the activeProbArray directly
|
||||
filteredProbArray = activeProbArray;
|
||||
}
|
||||
|
||||
iii = 0;
|
||||
for( final double prob : filteredProbArray ) {
|
||||
final ActivityProfileResult result = isActiveList.get(iii++);
|
||||
|
|
@ -119,6 +157,7 @@ public class ActivityProfile {
|
|||
result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE;
|
||||
result.resultValue = null;
|
||||
}
|
||||
|
||||
return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc);
|
||||
}
|
||||
|
||||
|
|
@ -166,6 +205,7 @@ public class ActivityProfile {
|
|||
private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
|
||||
return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
|
||||
}
|
||||
|
||||
private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List<ActiveRegion> returnList) {
|
||||
if( !isActive || curEnd - curStart < maxRegionSize ) {
|
||||
final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd);
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 7/27/12
|
||||
*/
|
||||
|
||||
public class ActivityProfileResult {
|
||||
private GenomeLoc loc;
|
||||
public double isActiveProb;
|
||||
public ActivityProfileResultState resultState;
|
||||
public Number resultValue;
|
||||
|
|
@ -16,16 +20,52 @@ public class ActivityProfileResult {
|
|||
HIGH_QUALITY_SOFT_CLIPS
|
||||
}
|
||||
|
||||
public ActivityProfileResult( final double isActiveProb ) {
|
||||
this.isActiveProb = isActiveProb;
|
||||
this.resultState = ActivityProfileResultState.NONE;
|
||||
this.resultValue = null;
|
||||
/**
|
||||
* Create a new ActivityProfileResult at loc with probability of being active of isActiveProb
|
||||
*
|
||||
* @param loc the position of the result profile (for debugging purposes)
|
||||
* @param isActiveProb the probability of being active (between 0 and 1)
|
||||
*/
|
||||
@Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"})
|
||||
public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb ) {
|
||||
this(loc, isActiveProb, ActivityProfileResultState.NONE, null);
|
||||
}
|
||||
|
||||
public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
|
||||
/**
|
||||
* Create a new ActivityProfileResult at loc with probability of being active of isActiveProb that maintains some
|
||||
* information about the result state and value (TODO RYAN -- what do these mean?)
|
||||
*
|
||||
* @param loc the position of the result profile (for debugging purposes)
|
||||
* @param isActiveProb the probability of being active (between 0 and 1)
|
||||
*/
|
||||
@Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"})
|
||||
public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
|
||||
// make sure the location of that activity profile is 1
|
||||
if ( loc.size() != 1 )
|
||||
throw new IllegalArgumentException("Location for an ActivityProfileResult must have to size 1 bp but saw " + loc);
|
||||
|
||||
this.loc = loc;
|
||||
this.isActiveProb = isActiveProb;
|
||||
this.resultState = resultState;
|
||||
this.resultValue = resultValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the genome loc associated with the ActivityProfileResult
|
||||
* @return the location of this result
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc getLoc() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ActivityProfileResult{" +
|
||||
"loc=" + loc +
|
||||
", isActiveProb=" + isActiveProb +
|
||||
", resultState=" + resultState +
|
||||
", resultValue=" + resultValue +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -587,7 +587,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
|
||||
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||
if ( nParts != genotypeParts.length )
|
||||
generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
|
||||
generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records at " + chr + ":" + pos, lineNo);
|
||||
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import net.sf.picard.reference.FastaSequenceIndex;
|
|||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
|
@ -40,6 +41,8 @@ import java.util.Arrays;
|
|||
* A caching version of the IndexedFastaSequenceFile that avoids going to disk as often as the raw indexer.
|
||||
*
|
||||
* Thread-safe! Uses a thread-local cache
|
||||
*
|
||||
* Automatically upper-cases the bases coming in, unless they the flag preserveCase is explicitly set
|
||||
*/
|
||||
public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
||||
protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class);
|
||||
|
|
@ -54,10 +57,15 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
public static final long DEFAULT_CACHE_SIZE = 1000000;
|
||||
|
||||
/** The cache size of this CachingIndexedFastaSequenceFile */
|
||||
final long cacheSize;
|
||||
private final long cacheSize;
|
||||
|
||||
/** When we have a cache miss at position X, we load sequence from X - cacheMissBackup */
|
||||
final long cacheMissBackup;
|
||||
private final long cacheMissBackup;
|
||||
|
||||
/**
|
||||
* If true, we will preserve the case of the original base in the genome, not
|
||||
*/
|
||||
private final boolean preserveCase;
|
||||
|
||||
// information about checking efficiency
|
||||
long cacheHits = 0;
|
||||
|
|
@ -84,37 +92,17 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
/**
|
||||
* Same as general constructor but allows one to override the default cacheSize
|
||||
*
|
||||
* @param fasta
|
||||
* @param index
|
||||
* @param cacheSize
|
||||
* @param fasta the file we will read our FASTA sequence from.
|
||||
* @param index the index of the fasta file, used for efficient random access
|
||||
* @param cacheSize the size in bp of the cache we will use for this reader
|
||||
* @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
|
||||
*/
|
||||
public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) {
|
||||
public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize, final boolean preserveCase) {
|
||||
super(fasta, index);
|
||||
if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0");
|
||||
this.cacheSize = cacheSize;
|
||||
this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
||||
*
|
||||
* @param fasta The file to open.
|
||||
* @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
|
||||
* @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found.
|
||||
*/
|
||||
public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) {
|
||||
this(fasta, index, DEFAULT_CACHE_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
||||
*
|
||||
* Looks for a index file for fasta on disk
|
||||
*
|
||||
* @param fasta The file to open.
|
||||
*/
|
||||
public CachingIndexedFastaSequenceFile(final File fasta) throws FileNotFoundException {
|
||||
this(fasta, DEFAULT_CACHE_SIZE);
|
||||
this.preserveCase = preserveCase;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -124,12 +112,76 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
* Uses provided cacheSize instead of the default
|
||||
*
|
||||
* @param fasta The file to open.
|
||||
* @param cacheSize the size of the cache to use in this CachingIndexedFastaReader, must be >= 0
|
||||
* @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
|
||||
*/
|
||||
public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize ) throws FileNotFoundException {
|
||||
public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize, final boolean preserveCase ) throws FileNotFoundException {
|
||||
super(fasta);
|
||||
if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0");
|
||||
this.cacheSize = cacheSize;
|
||||
this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
|
||||
this.preserveCase = preserveCase;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
||||
// *
|
||||
// * @param fasta The file to open.
|
||||
// * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
|
||||
// * @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found.
|
||||
// */
|
||||
// public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) {
|
||||
// this(fasta, index, DEFAULT_CACHE_SIZE);
|
||||
// }
|
||||
|
||||
/**
|
||||
* Same as general constructor but allows one to override the default cacheSize
|
||||
*
|
||||
* By default, this CachingIndexedFastaReader converts all incoming bases to upper case
|
||||
*
|
||||
* @param fasta the file we will read our FASTA sequence from.
|
||||
* @param index the index of the fasta file, used for efficient random access
|
||||
* @param cacheSize the size in bp of the cache we will use for this reader
|
||||
*/
|
||||
public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) {
|
||||
this(fasta, index, cacheSize, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
||||
*
|
||||
* Looks for a index file for fasta on disk.
|
||||
* This CachingIndexedFastaReader will convert all FASTA bases to upper cases under the hood
|
||||
*
|
||||
* @param fasta The file to open.
|
||||
*/
|
||||
public CachingIndexedFastaSequenceFile(final File fasta) throws FileNotFoundException {
|
||||
this(fasta, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
||||
*
|
||||
* Looks for a index file for fasta on disk
|
||||
*
|
||||
* @param fasta The file to open.
|
||||
* @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
|
||||
*/
|
||||
public CachingIndexedFastaSequenceFile(final File fasta, final boolean preserveCase) throws FileNotFoundException {
|
||||
this(fasta, DEFAULT_CACHE_SIZE, preserveCase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
||||
*
|
||||
* Looks for a index file for fasta on disk
|
||||
* Uses provided cacheSize instead of the default
|
||||
*
|
||||
* @param fasta The file to open.
|
||||
* @param cacheSize the size of the cache to use in this CachingIndexedFastaReader, must be >= 0
|
||||
*/
|
||||
public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize ) throws FileNotFoundException {
|
||||
this(fasta, cacheSize, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -168,6 +220,25 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
return cacheSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this CachingIndexedFastaReader keeping the original case of bases in the fasta, or is
|
||||
* everything being made upper case?
|
||||
*
|
||||
* @return true if the bases coming from this reader are in the original case in the fasta, false if they are all upper cased
|
||||
*/
|
||||
public boolean isPreservingCase() {
|
||||
return preserveCase;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is uppercasing bases?
|
||||
*
|
||||
* @return true if bases coming from this CachingIndexedFastaSequenceFile are all upper cased, false if this reader are in the original case in the fasta
|
||||
*/
|
||||
public boolean isUppercasingBases() {
|
||||
return ! isPreservingCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the subsequence of the contig in the range [start,stop]
|
||||
*
|
||||
|
|
@ -177,8 +248,10 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
* @param contig Contig whose subsequence to retrieve.
|
||||
* @param start inclusive, 1-based start of region.
|
||||
* @param stop inclusive, 1-based stop of region.
|
||||
* @return The partial reference sequence associated with this range.
|
||||
* @return The partial reference sequence associated with this range. If preserveCase is false, then
|
||||
* all of the bases in the ReferenceSequence returned by this method will be upper cased.
|
||||
*/
|
||||
@Override
|
||||
public ReferenceSequence getSubsequenceAt( final String contig, final long start, final long stop ) {
|
||||
final ReferenceSequence result;
|
||||
final Cache myCache = cache.get();
|
||||
|
|
@ -186,6 +259,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
if ( (stop - start) >= cacheSize ) {
|
||||
cacheMisses++;
|
||||
result = super.getSubsequenceAt(contig, start, stop);
|
||||
if ( ! preserveCase ) StringUtil.toUpperCase(result.getBases());
|
||||
} else {
|
||||
// todo -- potential optimization is to check if contig.name == contig, as this in generally will be true
|
||||
SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);
|
||||
|
|
@ -198,7 +272,9 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
myCache.start = Math.max(start - cacheMissBackup, 0);
|
||||
myCache.stop = Math.min(start + cacheSize + cacheMissBackup, contigInfo.getSequenceLength());
|
||||
myCache.seq = super.getSubsequenceAt(contig, myCache.start, myCache.stop);
|
||||
//System.out.printf("New cache at %s %d-%d%n", contig, cacheStart, cacheStop);
|
||||
|
||||
// convert all of the bases in the sequence to upper case if we aren't preserving cases
|
||||
if ( ! preserveCase ) StringUtil.toUpperCase(myCache.seq.getBases());
|
||||
} else {
|
||||
cacheHits++;
|
||||
}
|
||||
|
|
@ -215,8 +291,10 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
}
|
||||
}
|
||||
|
||||
// for debugging -- print out our efficiency if requested
|
||||
if ( PRINT_EFFICIENCY && (getCacheHits() + getCacheMisses()) % PRINT_FREQUENCY == 0 )
|
||||
printEfficiency(Priority.INFO);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -199,7 +199,7 @@ public class RecalDatum {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%.2f,%,2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
|
||||
return String.format("%.2f,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
|
||||
}
|
||||
|
||||
public String stringForCSV() {
|
||||
|
|
|
|||
|
|
@ -646,7 +646,7 @@ public class AlignmentUtils {
|
|||
|
||||
// get the indel element and move it left one base
|
||||
CigarElement ce = cigar.getCigarElement(indexOfIndel - 1);
|
||||
elements.add(new CigarElement(ce.getLength() - 1, ce.getOperator()));
|
||||
elements.add(new CigarElement(Math.max(ce.getLength() - 1, 0), ce.getOperator()));
|
||||
elements.add(cigar.getCigarElement(indexOfIndel));
|
||||
if (indexOfIndel + 1 < cigar.numCigarElements()) {
|
||||
ce = cigar.getCigarElement(indexOfIndel + 1);
|
||||
|
|
|
|||
|
|
@ -76,7 +76,6 @@ public class BCF2FieldWriterManager {
|
|||
if ( map.containsKey(field) )
|
||||
throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders");
|
||||
map.put(field, writer);
|
||||
if ( logger.isDebugEnabled() ) logger.debug(writer);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -0,0 +1,37 @@
|
|||
package org.broadinstitute.sting;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.testng.IAnnotationTransformer;
|
||||
import org.testng.annotations.ITestAnnotation;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
/**
|
||||
* Provide default @Test values for GATK testng tests.
|
||||
*
|
||||
* Currently only sets the maximum runtime to 10 minutes, if it's not been specified.
|
||||
*
|
||||
* See http://beust.com/weblog/2006/10/18/annotation-transformers-in-java/
|
||||
*
|
||||
* @author depristo
|
||||
* @since 10/31/12
|
||||
* @version 0.1
|
||||
*/
|
||||
public class TestNGTestTransformer implements IAnnotationTransformer {
|
||||
public static final long DEFAULT_TIMEOUT = 1000 * 60 * 10; // 10 minutes max per test
|
||||
|
||||
final static Logger logger = Logger.getLogger(TestNGTestTransformer.class);
|
||||
|
||||
public void transform(ITestAnnotation annotation,
|
||||
Class testClass,
|
||||
Constructor testConstructor,
|
||||
Method testMethod)
|
||||
{
|
||||
if ( annotation.getTimeOut() == 0 ) {
|
||||
logger.warn("test " + testMethod.toString() + " has no specified timeout, adding default timeout " + DEFAULT_TIMEOUT / 1000 / 60 + " minutes");
|
||||
annotation.setTimeOut(DEFAULT_TIMEOUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
|
|||
public void testCallableLociWalkerBed() {
|
||||
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
|
||||
Arrays.asList("9e4ec9c23f21a8162d27a39ab057398c", SUMMARY_MD5));
|
||||
Arrays.asList("42e86c06c167246a28bffdacaca75ffb", SUMMARY_MD5));
|
||||
executeTest("formatBed", spec);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
|
|||
public void testCallableLociWalkerPerBase() {
|
||||
String gatk_args = commonArgs + " -format STATE_PER_BASE -L 1:10,000,000-11,000,000 -summary %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
|
||||
Arrays.asList("e6044b4495ef24f542403e6a94437068", SUMMARY_MD5));
|
||||
Arrays.asList("d66c525d9c70f62df8156261d3e535ad", SUMMARY_MD5));
|
||||
executeTest("format_state_per_base", spec);
|
||||
}
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
|
|||
public void testCallableLociWalker2() {
|
||||
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-10,000,100 -L 1:10,000,110-10,000,120 -summary %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
|
||||
Arrays.asList("c671f65712d9575b8b3e1f1dbedc146e", "d287510eac04acf5a56f5cde2cba0e4a"));
|
||||
Arrays.asList("330f476085533db92a9dbdb3a127c041", "d287510eac04acf5a56f5cde2cba0e4a"));
|
||||
executeTest("formatBed by interval", spec);
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
|
|||
public void testCallableLociWalker3() {
|
||||
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
|
||||
Arrays.asList("b7d26a470ef906590249f2fa45fd6bdd", "da431d393f7c2b2b3e27556b86c1dbc7"));
|
||||
Arrays.asList("46a53379aaaf9803276a0a34b234f6ab", "da431d393f7c2b2b3e27556b86c1dbc7"));
|
||||
executeTest("formatBed lots of arguments", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import java.util.ArrayList;
|
|||
|
||||
public class UnifiedGenotyperLargeScaleTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
@Test( timeOut = 18000000 )
|
||||
public void testUnifiedGenotyperWholeGenome() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R " + hg18Reference +
|
||||
|
|
@ -22,7 +22,7 @@ public class UnifiedGenotyperLargeScaleTest extends WalkerTest {
|
|||
executeTest("testUnifiedGenotyperWholeGenome", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test( timeOut = 18000000 )
|
||||
public void testUnifiedGenotyperWholeExome() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R " + hg18Reference +
|
||||
|
|
@ -37,7 +37,7 @@ public class UnifiedGenotyperLargeScaleTest extends WalkerTest {
|
|||
executeTest("testUnifiedGenotyperWholeExome", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test( timeOut = 18000000 )
|
||||
public void testUnifiedGenotyperWGParallel() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R " + hg18Reference +
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.testng.SkipException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
// ********************************************************************************** //
|
||||
// Note that this class also serves as an integration test for the VariantAnnotator! //
|
||||
// ********************************************************************************** //
|
||||
|
||||
public class UnifiedGenotyperLiteIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing contamination down-sampling gets ignored
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testContaminationDownsampling() {
|
||||
if ( !GATKLiteUtils.isGATKLite() )
|
||||
throw new SkipException("Only want to test for GATK lite");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("9addd225a985178339a0c49dc5fdc220"));
|
||||
executeTest("test contamination_percentage_to_filter gets ignored", spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@ import org.testng.annotations.Test;
|
|||
import java.util.ArrayList;
|
||||
|
||||
public class IndelRealignerLargeScaleTest extends WalkerTest {
|
||||
@Test
|
||||
@Test( timeOut = 18000000 )
|
||||
public void testHighCoverage() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
||||
|
|
@ -21,7 +21,7 @@ public class IndelRealignerLargeScaleTest extends WalkerTest {
|
|||
executeTest("testIndelRealignerHighCoverage", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test( timeOut = 18000000 )
|
||||
public void testRealigner() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.testng.annotations.Test;
|
|||
import java.util.ArrayList;
|
||||
|
||||
public class RealignerTargetCreatorLargeScaleTest extends WalkerTest {
|
||||
@Test
|
||||
@Test( timeOut = 18000000 )
|
||||
public void testRealignerTargetCreator() {
|
||||
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
for ( int i = 0; i < cfg.probs.size(); i++ ) {
|
||||
double p = cfg.probs.get(i);
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
|
||||
profile.add(loc, new ActivityProfileResult(p));
|
||||
profile.add(new ActivityProfileResult(loc, p));
|
||||
}
|
||||
Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import java.util.concurrent.Executors;
|
|||
public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
||||
private File simpleFasta = new File(publicTestDir + "/exampleFASTA.fasta");
|
||||
private static final int STEP_SIZE = 1;
|
||||
private final static boolean DEBUG = false;
|
||||
|
||||
//private static final List<Integer> QUERY_SIZES = Arrays.asList(1);
|
||||
private static final List<Integer> QUERY_SIZES = Arrays.asList(1, 10, 100);
|
||||
|
|
@ -53,9 +54,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
return cacheSizeRequested == -1 ? CachingIndexedFastaSequenceFile.DEFAULT_CACHE_SIZE : cacheSizeRequested;
|
||||
}
|
||||
|
||||
@Test(dataProvider = "fastas", enabled = true)
|
||||
@Test(dataProvider = "fastas", enabled = true && ! DEBUG)
|
||||
public void testCachingIndexedFastaReaderSequential1(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
|
||||
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
|
||||
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);
|
||||
|
||||
SAMSequenceRecord contig = caching.getSequenceDictionary().getSequence(0);
|
||||
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d",
|
||||
|
|
@ -64,6 +65,8 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
|
||||
Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
|
||||
|
||||
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||
|
||||
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
||||
|
|
@ -92,10 +95,10 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
// Tests grabbing sequences around a middle cached value.
|
||||
@Test(dataProvider = "fastas", enabled = true)
|
||||
@Test(dataProvider = "fastas", enabled = true && ! DEBUG)
|
||||
public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
|
||||
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
|
||||
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);
|
||||
|
||||
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
||||
|
||||
|
|
@ -123,11 +126,6 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
@DataProvider(name = "ParallelFastaTest")
|
||||
public Object[][] createParallelFastaTest() {
|
||||
List<Object[]> params = new ArrayList<Object[]>();
|
||||
// for ( int nt : Arrays.asList(1, 2, 3) ) {
|
||||
// for ( int cacheSize : CACHE_SIZES ) {
|
||||
// params.add(new Object[]{simpleFasta, cacheSize, 10, nt});
|
||||
// }
|
||||
// }
|
||||
|
||||
for ( File fasta : Arrays.asList(simpleFasta) ) {
|
||||
for ( int cacheSize : CACHE_SIZES ) {
|
||||
|
|
@ -143,9 +141,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "ParallelFastaTest", enabled = true, timeOut = 60000)
|
||||
@Test(dataProvider = "ParallelFastaTest", enabled = true && ! DEBUG, timeOut = 60000)
|
||||
public void testCachingIndexedFastaReaderParallel(final File fasta, final int cacheSize, final int querySize, final int nt) throws FileNotFoundException, InterruptedException {
|
||||
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
|
||||
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);
|
||||
|
||||
logger.warn(String.format("Parallel caching index fasta reader test cacheSize %d querySize %d nt %d", caching.getCacheSize(), querySize, nt));
|
||||
for ( int iterations = 0; iterations < 1; iterations++ ) {
|
||||
|
|
@ -163,4 +161,49 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
executor.shutdownNow();
|
||||
}
|
||||
}
|
||||
|
||||
// make sure some bases are lower case and some are upper case
|
||||
@Test(enabled = true)
|
||||
public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
|
||||
final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
|
||||
final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
|
||||
final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
|
||||
|
||||
int nMixedCase = 0;
|
||||
for ( SAMSequenceRecord contig : original.getSequenceDictionary().getSequences() ) {
|
||||
nMixedCase += testCases(original, casePreserving, allUpper, contig.getSequenceName(), -1, -1);
|
||||
|
||||
final int step = 100;
|
||||
for ( int lastPos = step; lastPos < contig.getSequenceLength(); lastPos += step ) {
|
||||
testCases(original, casePreserving, allUpper, contig.getSequenceName(), lastPos - step, lastPos);
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file. Unexpected test state");
|
||||
}
|
||||
|
||||
private int testCases(final IndexedFastaSequenceFile original,
|
||||
final IndexedFastaSequenceFile casePreserving,
|
||||
final IndexedFastaSequenceFile allUpper,
|
||||
final String contig, final int start, final int stop ) {
|
||||
final String orig = fetchBaseString(original, contig, start, stop);
|
||||
final String keptCase = fetchBaseString(casePreserving, contig, start, stop);
|
||||
final String upperCase = fetchBaseString(allUpper, contig, start, stop).toUpperCase();
|
||||
|
||||
final String origToUpper = orig.toUpperCase();
|
||||
if ( ! orig.equals(origToUpper) ) {
|
||||
Assert.assertEquals(keptCase, orig, "Case preserving operation not equal to the original case for contig " + contig);
|
||||
Assert.assertEquals(upperCase, origToUpper, "All upper case reader not equal to the uppercase of original case for contig " + contig);
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private String fetchBaseString(final IndexedFastaSequenceFile reader, final String contig, final int start, final int stop) {
|
||||
if ( start == -1 )
|
||||
return new String(reader.getSequence(contig).getBases());
|
||||
else
|
||||
return new String(reader.getSubsequenceAt(contig, start, stop).getBases());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import org.testng.annotations.Test
|
|||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class DataProcessingPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testSimpleBAM {
|
||||
val projectName = "test1"
|
||||
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
|
||||
|
|
@ -45,7 +45,7 @@ class DataProcessingPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testBWAPEBAM {
|
||||
val projectName = "test2"
|
||||
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import org.testng.annotations.Test
|
|||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class PacbioProcessingPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testPacbioProcessingPipeline {
|
||||
val testOut = "exampleBAM.recal.bam"
|
||||
val spec = new PipelineTestSpec
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
|||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class DevNullOutputPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testDevNullOutput() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "devnulloutput"
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
|||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleCountLociPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testCountLoci() {
|
||||
val testOut = "count.out"
|
||||
val spec = new PipelineTestSpec
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
|||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleCountReadsPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testCountReads() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "countreads"
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
|||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleReadFilterPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testExampleReadFilter() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "examplereadfilter"
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
|||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleRetryMemoryLimitPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testRetryMemoryLimit() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "RetryMemoryLimit"
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
|||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleUnifiedGenotyperPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testUnifiedGenotyper() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper"
|
||||
|
|
@ -51,7 +51,7 @@ class ExampleUnifiedGenotyperPipelineTest {
|
|||
Array("vcf_intervals", BaseTest.validationDataLocation + "intervalTest.1.vcf")
|
||||
).asInstanceOf[Array[Array[Object]]]
|
||||
|
||||
@Test(dataProvider = "ugIntervals")
|
||||
@Test(dataProvider = "ugIntervals", timeOut=36000000)
|
||||
def testUnifiedGenotyperWithIntervals(intervalsName: String, intervalsPath: String) {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_with_" + intervalsName
|
||||
|
|
@ -64,7 +64,7 @@ class ExampleUnifiedGenotyperPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testUnifiedGenotyperNoGCOpt() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_no_gc_opt"
|
||||
|
|
@ -80,7 +80,7 @@ class ExampleUnifiedGenotyperPipelineTest {
|
|||
@DataProvider(name="resMemReqParams")
|
||||
def getResMemReqParam = Array(Array("mem_free"), Array("virtual_free")).asInstanceOf[Array[Array[Object]]]
|
||||
|
||||
@Test(dataProvider = "resMemReqParams")
|
||||
@Test(dataProvider = "resMemReqParams", timeOut=36000000)
|
||||
def testUnifiedGenotyperResMemReqParam(reqParam: String) {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_" + reqParam
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import org.testng.annotations.Test
|
|||
import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
||||
|
||||
class HelloWorldPipelineTest {
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorld() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorld"
|
||||
|
|
@ -37,7 +37,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithRunName() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldWithRunName"
|
||||
|
|
@ -47,7 +47,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithMemoryLimit() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldMemoryLimit"
|
||||
|
|
@ -57,7 +57,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithPriority() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldWithPriority"
|
||||
|
|
@ -67,7 +67,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithLsfResource() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldWithLsfResource"
|
||||
|
|
@ -77,7 +77,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithLsfResourceAndMemoryLimit() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldWithLsfResourceAndMemoryLimit"
|
||||
|
|
@ -87,7 +87,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithLsfEnvironment() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldWithLsfEnvironment"
|
||||
|
|
@ -97,7 +97,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithGridEngineResource() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldWithGridEngineResource"
|
||||
|
|
@ -107,7 +107,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithGridEngineResourceAndMemoryLimit() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldWithGridEngineResourceAndMemoryLimit"
|
||||
|
|
@ -117,7 +117,7 @@ class HelloWorldPipelineTest {
|
|||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeOut=36000000)
|
||||
def testHelloWorldWithGridEngineEnvironment() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "HelloWorldWithGridEngineEnvironment"
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="110" status="integration" />
|
||||
<info organisation="org.broad" module="tribble" revision="119" status="integration" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue