Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
5da278d926
61
build.xml
61
build.xml
|
|
@ -780,6 +780,50 @@
|
|||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<!-- FAILED-TEST -->
|
||||
<macrodef name="run-failed-test">
|
||||
<attribute name="xmlfailedtestfile" />
|
||||
<sequential>
|
||||
<!-- Get the pipeline run type. Default to dry. -->
|
||||
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
||||
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
||||
</condition>
|
||||
|
||||
<condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
|
||||
<isset property="include.contracts" />
|
||||
</condition>
|
||||
|
||||
<mkdir dir="${report}/failed_rerun" />
|
||||
<echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
|
||||
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
|
||||
<testng outputDir="${report}/failed_rerun"
|
||||
haltOnFailure="false" failureProperty="test.failure"
|
||||
verbose="2"
|
||||
workingDir="${basedir}"
|
||||
useDefaultListeners="false"
|
||||
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
|
||||
<jvmarg value="-Xmx${test.maxmemory}" />
|
||||
<jvmarg value="-Djava.awt.headless=true" />
|
||||
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
|
||||
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
|
||||
<jvmarg line="${cofoja.jvm.args}"/>
|
||||
<!-- <jvmarg value="-Xdebug"/> -->
|
||||
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
|
||||
<classpath>
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.classes}" />
|
||||
<pathelement location="${scala.classes}" />
|
||||
<pathelement location="${java.contracts}" />
|
||||
<pathelement location="${java.test.classes}" />
|
||||
<pathelement location="${scala.test.classes}" />
|
||||
</classpath>
|
||||
|
||||
<xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
|
||||
</testng>
|
||||
|
||||
<fail message="test failed" if="test.failure" />
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
|
||||
<target name="test" depends="test.compile,tribble.test" description="Run unit tests">
|
||||
|
|
@ -814,6 +858,22 @@
|
|||
<run-test testtype="${pipetype}"/>
|
||||
</target>
|
||||
|
||||
<target name="failed-test" depends="test.compile">
|
||||
<run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
|
||||
</target>
|
||||
|
||||
<target name="failed-integration" depends="test.compile">
|
||||
<run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
|
||||
</target>
|
||||
|
||||
<target name="failed-performance" depends="test.compile">
|
||||
<run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
|
||||
</target>
|
||||
|
||||
<target name="failed-pipeline" depends="test.compile">
|
||||
<run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
|
||||
</target>
|
||||
|
||||
<!-- ***************************************************************************** -->
|
||||
<!-- *********** Tribble ********* -->
|
||||
<!-- ***************************************************************************** -->
|
||||
|
|
@ -981,6 +1041,7 @@
|
|||
<delete dir="out"/>
|
||||
<delete dir="${build.dir}"/>
|
||||
<delete dir="${lib.dir}"/>
|
||||
<delete dir="dump"/>
|
||||
<delete dir="staging"/>
|
||||
<delete dir="${dist.dir}"/>
|
||||
<delete dir="pipelinetests"/>
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
|
|
@ -12,6 +11,7 @@
|
|||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
|
|
@ -22,25 +22,22 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
import net.sf.picard.util.QualityUtil;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
/**
|
||||
* Filter out mapping quality zero reads.
|
||||
*
|
||||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class BatchMergeIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testBatchMerge1() {
|
||||
String bam = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||
String alleles = validationDataLocation + "batch.merge.alleles.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -NO_HEADER -BTI alleles -stand_call_conf 0.0 -glm BOTH -G none -nsl -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -o %s -R " + b37KGReference
|
||||
+ " -B:alleles,VCF " + alleles
|
||||
+ " -I " + bam,
|
||||
1,
|
||||
Arrays.asList("f4ed8f4ef2cba96823c06e90e9d0de35"));
|
||||
executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec);
|
||||
public class MappingQualityUnavailableReadFilter extends ReadFilter {
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -24,17 +24,16 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Filter out zero mapping quality reads.
|
||||
* Filter out mapping quality zero reads.
|
||||
*
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class ZeroMappingQualityReadFilter extends ReadFilter {
|
||||
public class MappingQualityZeroReadFilter extends ReadFilter {
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
return (rec.getMappingQuality() == 0);
|
||||
}
|
||||
|
|
@ -55,10 +55,14 @@ public class TableFeature implements Feature {
|
|||
}
|
||||
|
||||
public List<String> getAllValues() {
|
||||
return getValuesTo(values.size()-1);
|
||||
return getValuesTo(values.size());
|
||||
}
|
||||
|
||||
public List<String> getValuesTo(int columnPosition) {
|
||||
return values.subList(0,columnPosition);
|
||||
}
|
||||
|
||||
public List<String> getHeader() {
|
||||
return keys;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,5 +62,5 @@ public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAn
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList("AB"); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
|
||||
}
|
||||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -41,8 +42,8 @@ import java.util.*;
|
|||
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
|
||||
|
||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -142,5 +143,5 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot
|
|||
// public String getIndelBases()
|
||||
public List<String> getKeyNames() { return Arrays.asList("AD"); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -21,7 +22,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
|
|||
|
||||
protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners
|
||||
if ( isUsableBase(p) ) {
|
||||
if ( p.getBase() == ref ) {
|
||||
refQuals.add((double)p.getMappingQual());
|
||||
} else if ( p.getBase() == alt ) {
|
||||
|
|
@ -34,7 +35,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
|
|||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
for (final PileupElement p: pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() < 254) {
|
||||
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
// by design, first element in LinkedHashMap was ref allele
|
||||
|
|
@ -54,8 +55,6 @@ public class MappingQualityRankSumTest extends RankSumTest {
|
|||
refQuals.add((double)p.getMappingQual());
|
||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
altQuals.add((double)p.getMappingQual());
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,5 +47,5 @@ public class NBaseCount implements InfoFieldAnnotation {
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList("PercentNBaseSolid"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 4, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -38,8 +39,10 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio
|
|||
pileup = context.getBasePileup();
|
||||
|
||||
if (pileup != null) {
|
||||
for (PileupElement p : pileup )
|
||||
qualities[index++] = p.getRead().getMappingQuality();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
||||
qualities[index++] = p.getMappingQual();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -106,6 +106,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnota
|
|||
protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
|
||||
|
||||
protected static boolean isUsableBase( final PileupElement p ) {
|
||||
return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here
|
||||
return !( p.isDeletion() ||
|
||||
p.getMappingQual() == 0 ||
|
||||
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
|
||||
((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here
|
||||
}
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
|
|
@ -200,8 +201,8 @@ public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation {
|
|||
1,
|
||||
VCFHeaderLineType.Integer,
|
||||
"Total read depth per sample, including MQ0"),
|
||||
new VCFFormatHeaderLine(getKeyNames().get(1),
|
||||
VCFCompoundHeaderLine.UNBOUNDED,
|
||||
new VCFFormatHeaderLine(getKeyNames().get(1),
|
||||
VCFHeaderLineCount.UNBOUNDED,
|
||||
VCFHeaderLineType.Float,
|
||||
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -65,5 +66,5 @@ public class SampleList implements InfoFieldAnnotation {
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList("Samples"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.BlockCompressedInputStream;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 1:09 PM
|
||||
*
|
||||
* Class implementing diffnode reader for VCF
|
||||
*/
|
||||
public class BAMDiffableReader implements DiffableReader {
|
||||
@Override
|
||||
public String getName() { return "BAM"; }
|
||||
|
||||
@Override
|
||||
public DiffElement readFromFile(File file, int maxElementsToRead) {
|
||||
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||
|
||||
DiffNode root = DiffNode.rooted(file.getName());
|
||||
SAMRecordIterator iterator = reader.iterator();
|
||||
|
||||
int count = 0;
|
||||
while ( iterator.hasNext() ) {
|
||||
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
final SAMRecord record = iterator.next();
|
||||
|
||||
// name is the read name + first of pair
|
||||
String name = record.getReadName().replace('.', '_');
|
||||
if ( record.getReadPairedFlag() ) {
|
||||
name += record.getFirstOfPairFlag() ? "_1" : "_2";
|
||||
}
|
||||
|
||||
DiffNode readRoot = DiffNode.empty(name, root);
|
||||
|
||||
// add fields
|
||||
readRoot.add("NAME", record.getReadName());
|
||||
readRoot.add("FLAGS", record.getFlags());
|
||||
readRoot.add("RNAME", record.getReferenceName());
|
||||
readRoot.add("POS", record.getAlignmentStart());
|
||||
readRoot.add("MAPQ", record.getMappingQuality());
|
||||
readRoot.add("CIGAR", record.getCigarString());
|
||||
readRoot.add("RNEXT", record.getMateReferenceName());
|
||||
readRoot.add("PNEXT", record.getMateAlignmentStart());
|
||||
readRoot.add("TLEN", record.getInferredInsertSize());
|
||||
readRoot.add("SEQ", record.getReadString());
|
||||
readRoot.add("QUAL", record.getBaseQualityString());
|
||||
|
||||
for ( SAMRecord.SAMTagAndValue xt : record.getAttributes() ) {
|
||||
readRoot.add(xt.tag, xt.value);
|
||||
}
|
||||
|
||||
// add record to root
|
||||
if ( ! root.hasElement(name) )
|
||||
// protect ourselves from malformed files
|
||||
root.add(readRoot);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
||||
return root.getBinding();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRead(File file) {
|
||||
final byte[] BAM_MAGIC = "BAM\1".getBytes();
|
||||
final byte[] buffer = new byte[BAM_MAGIC.length];
|
||||
try {
|
||||
FileInputStream fstream = new FileInputStream(file);
|
||||
new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length);
|
||||
return Arrays.equals(buffer, BAM_MAGIC);
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
} catch ( net.sf.samtools.FileTruncatedException e ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import com.google.java.contract.*;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:55 PM
|
||||
*
|
||||
* An interface that must be implemented to allow us to calculate differences
|
||||
* between structured objects
|
||||
*/
|
||||
@Invariant({
|
||||
"name != null",
|
||||
"value != null",
|
||||
"parent != null || name.equals(\"ROOT\")",
|
||||
"value == null || value.getBinding() == this"})
|
||||
public class DiffElement {
|
||||
public final static DiffElement ROOT = new DiffElement();
|
||||
|
||||
final private String name;
|
||||
final private DiffElement parent;
|
||||
final private DiffValue value;
|
||||
|
||||
/**
|
||||
* For ROOT only
|
||||
*/
|
||||
private DiffElement() {
|
||||
this.name = "ROOT";
|
||||
this.parent = null;
|
||||
this.value = new DiffValue(this, "ROOT");
|
||||
}
|
||||
|
||||
@Requires({"name != null", "parent != null", "value != null"})
|
||||
public DiffElement(String name, DiffElement parent, DiffValue value) {
|
||||
if ( name.equals("ROOT") ) throw new IllegalArgumentException("Cannot use reserved name ROOT");
|
||||
this.name = name;
|
||||
this.parent = parent;
|
||||
this.value = value;
|
||||
this.value.setBinding(this);
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public DiffElement getParent() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public DiffValue getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public boolean isRoot() { return this == ROOT; }
|
||||
|
||||
@Ensures({"result != null"})
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName() + "=" + getValue().toString();
|
||||
}
|
||||
|
||||
public String toString(int offset) {
|
||||
return (offset > 0 ? Utils.dupString(' ', offset) : 0) + getName() + "=" + getValue().toString(offset);
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public final String fullyQualifiedName() {
|
||||
if ( isRoot() )
|
||||
return "";
|
||||
else if ( parent.isRoot() )
|
||||
return name;
|
||||
else
|
||||
return parent.fullyQualifiedName() + "." + name;
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public String toOneLineString() {
|
||||
return getName() + "=" + getValue().toOneLineString();
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public DiffNode getValueAsNode() {
|
||||
if ( getValue().isCompound() )
|
||||
return (DiffNode)getValue();
|
||||
else
|
||||
throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return 1 + getValue().size();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,360 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:51 PM
|
||||
* A generic engine for comparing tree-structured objects
|
||||
*/
|
||||
public class DiffEngine {
|
||||
final protected static Logger logger = Logger.getLogger(DiffEngine.class);
|
||||
|
||||
private final Map<String, DiffableReader> readers = new HashMap<String, DiffableReader>();
|
||||
|
||||
public DiffEngine() {
|
||||
loadDiffableReaders();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// difference calculation
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public List<SpecificDifference> diff(DiffElement master, DiffElement test) {
|
||||
DiffValue masterValue = master.getValue();
|
||||
DiffValue testValue = test.getValue();
|
||||
|
||||
if ( masterValue.isCompound() && masterValue.isCompound() ) {
|
||||
return diff(master.getValueAsNode(), test.getValueAsNode());
|
||||
} else if ( masterValue.isAtomic() && testValue.isAtomic() ) {
|
||||
return diff(masterValue, testValue);
|
||||
} else {
|
||||
// structural difference in types. one is node, other is leaf
|
||||
return Arrays.asList(new SpecificDifference(master, test));
|
||||
}
|
||||
}
|
||||
|
||||
public List<SpecificDifference> diff(DiffNode master, DiffNode test) {
|
||||
Set<String> allNames = new HashSet<String>(master.getElementNames());
|
||||
allNames.addAll(test.getElementNames());
|
||||
List<SpecificDifference> diffs = new ArrayList<SpecificDifference>();
|
||||
|
||||
for ( String name : allNames ) {
|
||||
DiffElement masterElt = master.getElement(name);
|
||||
DiffElement testElt = test.getElement(name);
|
||||
if ( masterElt == null && testElt == null ) {
|
||||
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
|
||||
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
|
||||
// todo -- should one of these be a special MISSING item?
|
||||
diffs.add(new SpecificDifference(masterElt, testElt));
|
||||
} else {
|
||||
diffs.addAll(diff(masterElt, testElt));
|
||||
}
|
||||
}
|
||||
|
||||
return diffs;
|
||||
}
|
||||
|
||||
public List<SpecificDifference> diff(DiffValue master, DiffValue test) {
|
||||
if ( master.getValue().equals(test.getValue()) ) {
|
||||
return Collections.emptyList();
|
||||
} else {
|
||||
return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding()));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Summarizing differences
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Emits a summary of the diffs to out. Suppose you have the following three differences:
|
||||
*
|
||||
* A.X.Z:1!=2
|
||||
* A.Y.Z:3!=4
|
||||
* B.X.Z:5!=6
|
||||
*
|
||||
* The above is the itemized list of the differences. The summary looks for common differences
|
||||
* in the name hierarchy, counts those shared elements, and emits the differences that occur
|
||||
* in order of decreasing counts.
|
||||
*
|
||||
* So, in the above example, what are the shared elements?
|
||||
*
|
||||
* A.X.Z and B.X.Z share X.Z, so there's a *.X.Z with count 2
|
||||
* A.X.Z, A.Y.Z, and B.X.Z all share *.*.Z, with count 3
|
||||
* Each of A.X.Z, A.Y.Z, and B.X.Z are individually unique, with count 1
|
||||
*
|
||||
* So we would emit the following summary:
|
||||
*
|
||||
* *.*.Z: 3
|
||||
* *.X.Z: 2
|
||||
* A.X.Z: 1 [specific difference: 1!=2]
|
||||
* A.Y.Z: 1 [specific difference: 3!=4]
|
||||
* B.X.Z: 1 [specific difference: 5!=6]
|
||||
*
|
||||
* The algorithm to accomplish this calculation is relatively simple. Start with all of the
|
||||
* concrete differences. For each pair of differences A1.A2....AN and B1.B2....BN:
|
||||
*
|
||||
* find the longest common subsequence Si.Si+1...SN where Ai = Bi = Si
|
||||
* If i == 0, then there's no shared substructure
|
||||
* If i > 0, then generate the summarized value X = *.*...Si.Si+1...SN
|
||||
* if X is a known summary, increment it's count, otherwise set its count to 1
|
||||
*
|
||||
* Not that only pairs of the same length are considered as potentially equivalent
|
||||
*
|
||||
* @param params determines how we display the items
|
||||
* @param diffs
|
||||
*/
|
||||
public void reportSummarizedDifferences(List<SpecificDifference> diffs, SummaryReportParams params ) {
|
||||
printSummaryReport(summarizeDifferences(diffs), params );
|
||||
}
|
||||
|
||||
public List<Difference> summarizeDifferences(List<SpecificDifference> diffs) {
|
||||
return summarizedDifferencesOfPaths(diffs);
|
||||
}
|
||||
|
||||
final protected static String[] diffNameToPath(String diffName) {
|
||||
return diffName.split("\\.");
|
||||
}
|
||||
|
||||
protected List<Difference> summarizedDifferencesOfPathsFromString(List<String> singletonDiffs) {
|
||||
List<Difference> diffs = new ArrayList<Difference>();
|
||||
|
||||
for ( String diff : singletonDiffs ) {
|
||||
diffs.add(new Difference(diff));
|
||||
}
|
||||
|
||||
return summarizedDifferencesOfPaths(diffs);
|
||||
}
|
||||
|
||||
protected List<Difference> summarizedDifferencesOfPaths(List<? extends Difference> singletonDiffs) {
|
||||
Map<String, Difference> summaries = new HashMap<String, Difference>();
|
||||
|
||||
// create the initial set of differences
|
||||
for ( int i = 0; i < singletonDiffs.size(); i++ ) {
|
||||
for ( int j = 0; j <= i; j++ ) {
|
||||
Difference diffPath1 = singletonDiffs.get(i);
|
||||
Difference diffPath2 = singletonDiffs.get(j);
|
||||
if ( diffPath1.length() == diffPath2.length() ) {
|
||||
int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts());
|
||||
String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath();
|
||||
addSummary(summaries, path, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// count differences
|
||||
for ( Difference diffPath : singletonDiffs ) {
|
||||
for ( Difference sumDiff : summaries.values() ) {
|
||||
if ( sumDiff.matches(diffPath.getParts()) )
|
||||
addSummary(summaries, sumDiff.getPath(), false);
|
||||
}
|
||||
}
|
||||
|
||||
List<Difference> sortedSummaries = new ArrayList<Difference>(summaries.values());
|
||||
Collections.sort(sortedSummaries);
|
||||
return sortedSummaries;
|
||||
}
|
||||
|
||||
private static void addSummary(Map<String, Difference> summaries, String path, boolean onlyCatalog) {
|
||||
if ( summaries.containsKey(path) ) {
|
||||
if ( ! onlyCatalog )
|
||||
summaries.get(path).incCount();
|
||||
} else {
|
||||
Difference sumDiff = new Difference(path);
|
||||
summaries.put(sumDiff.getPath(), sumDiff);
|
||||
}
|
||||
}
|
||||
|
||||
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
|
||||
GATKReport report = new GATKReport();
|
||||
final String tableName = "diffences";
|
||||
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information");
|
||||
GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("Difference", true);
|
||||
table.addColumn("NumberOfOccurrences", 0);
|
||||
|
||||
int count = 0, count1 = 0;
|
||||
for ( Difference diff : sortedSummaries ) {
|
||||
if ( diff.getCount() < params.minSumDiffToShow )
|
||||
// in order, so break as soon as the count is too low
|
||||
break;
|
||||
|
||||
if ( params.maxItemsToDisplay != 0 && count++ > params.maxItemsToDisplay )
|
||||
break;
|
||||
|
||||
if ( diff.getCount() == 1 ) {
|
||||
count1++;
|
||||
if ( params.maxCountOneItems != 0 && count1 > params.maxCountOneItems )
|
||||
break;
|
||||
}
|
||||
|
||||
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
|
||||
}
|
||||
|
||||
table.write(params.out);
|
||||
}
|
||||
|
||||
protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) {
|
||||
int i = 0;
|
||||
for ( ; i < diffPath1.length; i++ ) {
|
||||
int j = diffPath1.length - i - 1;
|
||||
if ( ! diffPath1[j].equals(diffPath2[j]) )
|
||||
break;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* parts is [A B C D]
|
||||
* commonPostfixLength: how many parts are shared at the end, suppose its 2
|
||||
* We want to create a string *.*.C.D
|
||||
*
|
||||
* @param parts
|
||||
* @param commonPostfixLength
|
||||
* @return
|
||||
*/
|
||||
protected static String summarizedPath(String[] parts, int commonPostfixLength) {
|
||||
int stop = parts.length - commonPostfixLength;
|
||||
if ( stop > 0 ) parts = parts.clone();
|
||||
for ( int i = 0; i < stop; i++ ) {
|
||||
parts[i] = "*";
|
||||
}
|
||||
return Utils.join(".", parts);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// plugin manager
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public void loadDiffableReaders() {
|
||||
List<Class<? extends DiffableReader>> drClasses = new PluginManager<DiffableReader>( DiffableReader.class ).getPlugins();
|
||||
|
||||
logger.info("Loading diffable modules:");
|
||||
for (Class<? extends DiffableReader> drClass : drClasses ) {
|
||||
logger.info("\t" + drClass.getSimpleName());
|
||||
|
||||
try {
|
||||
DiffableReader dr = drClass.newInstance();
|
||||
readers.put(dr.getName(), dr);
|
||||
} catch (InstantiationException e) {
|
||||
throw new ReviewedStingException("Unable to instantiate module '" + drClass.getSimpleName() + "'");
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new ReviewedStingException("Illegal access error when trying to instantiate '" + drClass.getSimpleName() + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, DiffableReader> getReaders() {
|
||||
return readers;
|
||||
}
|
||||
|
||||
protected DiffableReader getReader(String name) {
|
||||
return readers.get(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a reader appropriate for this file, or null if no such reader exists
|
||||
* @param file
|
||||
* @return
|
||||
*/
|
||||
public DiffableReader findReaderForFile(File file) {
|
||||
for ( DiffableReader reader : readers.values() )
|
||||
if (reader.canRead(file) )
|
||||
return reader;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if reader appropriate for this file, or false if no such reader exists
|
||||
* @param file
|
||||
* @return
|
||||
*/
|
||||
public boolean canRead(File file) {
|
||||
return findReaderForFile(file) != null;
|
||||
}
|
||||
|
||||
|
||||
public DiffElement createDiffableFromFile(File file) {
|
||||
return createDiffableFromFile(file, -1);
|
||||
}
|
||||
|
||||
public DiffElement createDiffableFromFile(File file, int maxElementsToRead) {
|
||||
DiffableReader reader = findReaderForFile(file);
|
||||
if ( reader == null )
|
||||
throw new UserException("Unsupported file type: " + file);
|
||||
else
|
||||
return reader.readFromFile(file, maxElementsToRead);
|
||||
}
|
||||
|
||||
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
|
||||
DiffEngine diffEngine = new DiffEngine();
|
||||
|
||||
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
||||
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static class SummaryReportParams {
|
||||
PrintStream out = System.out;
|
||||
int maxItemsToDisplay = 0;
|
||||
int maxCountOneItems = 0;
|
||||
int minSumDiffToShow = 0;
|
||||
|
||||
public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
|
||||
this.out = out;
|
||||
this.maxItemsToDisplay = maxItemsToDisplay;
|
||||
this.maxCountOneItems = maxCountOneItems;
|
||||
this.minSumDiffToShow = minSumDiffToShow;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,248 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:55 PM
|
||||
*
|
||||
* An interface that must be implemented to allow us to calculate differences
|
||||
* between structured objects
|
||||
*/
|
||||
public class DiffNode extends DiffValue {
|
||||
private Map<String, DiffElement> getElementMap() {
|
||||
return (Map<String, DiffElement>)super.getValue();
|
||||
}
|
||||
private static Map<String, DiffElement> emptyElements() { return new HashMap<String, DiffElement>(); }
|
||||
|
||||
private DiffNode(Map<String, DiffElement> elements) {
|
||||
super(elements);
|
||||
}
|
||||
|
||||
private DiffNode(DiffElement binding, Map<String, DiffElement> elements) {
|
||||
super(binding, elements);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
public static DiffNode rooted(String name) {
|
||||
return empty(name, DiffElement.ROOT);
|
||||
}
|
||||
|
||||
public static DiffNode empty(String name, DiffElement parent) {
|
||||
DiffNode df = new DiffNode(emptyElements());
|
||||
DiffElement elt = new DiffElement(name, parent, df);
|
||||
df.setBinding(elt);
|
||||
return df;
|
||||
}
|
||||
|
||||
public static DiffNode empty(String name, DiffValue parent) {
|
||||
return empty(name, parent.getBinding());
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// accessors
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public boolean isAtomic() { return false; }
|
||||
|
||||
public Collection<String> getElementNames() {
|
||||
return getElementMap().keySet();
|
||||
}
|
||||
|
||||
public Collection<DiffElement> getElements() {
|
||||
return getElementMap().values();
|
||||
}
|
||||
|
||||
private Collection<DiffElement> getElements(boolean atomicOnly) {
|
||||
List<DiffElement> elts = new ArrayList<DiffElement>();
|
||||
for ( DiffElement elt : getElements() )
|
||||
if ( (atomicOnly && elt.getValue().isAtomic()) || (! atomicOnly && elt.getValue().isCompound()))
|
||||
elts.add(elt);
|
||||
return elts;
|
||||
}
|
||||
|
||||
public Collection<DiffElement> getAtomicElements() {
|
||||
return getElements(true);
|
||||
}
|
||||
|
||||
public Collection<DiffElement> getCompoundElements() {
|
||||
return getElements(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the element bound to name, or null if no such binding exists
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
public DiffElement getElement(String name) {
|
||||
return getElementMap().get(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if name is bound in this node
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
public boolean hasElement(String name) {
|
||||
return getElement(name) != null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// add
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@Requires("elt != null")
|
||||
public void add(DiffElement elt) {
|
||||
if ( getElementMap().containsKey(elt.getName()) )
|
||||
throw new IllegalArgumentException("Attempting to rebind already existing binding: " + elt + " node=" + this);
|
||||
getElementMap().put(elt.getName(), elt);
|
||||
}
|
||||
|
||||
@Requires("elt != null")
|
||||
public void add(DiffValue elt) {
|
||||
add(elt.getBinding());
|
||||
}
|
||||
|
||||
@Requires("elts != null")
|
||||
public void add(Collection<DiffElement> elts) {
|
||||
for ( DiffElement e : elts )
|
||||
add(e);
|
||||
}
|
||||
|
||||
public void add(String name, Object value) {
|
||||
add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
|
||||
}
|
||||
|
||||
public int size() {
|
||||
int count = 0;
|
||||
for ( DiffElement value : getElements() )
|
||||
count += value.size();
|
||||
return count;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// toString
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toString(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int offset) {
|
||||
String off = offset > 0 ? Utils.dupString(' ', offset) : "";
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
b.append("(").append("\n");
|
||||
Collection<DiffElement> atomicElts = getAtomicElements();
|
||||
for ( DiffElement elt : atomicElts ) {
|
||||
b.append(elt.toString(offset + 2)).append('\n');
|
||||
}
|
||||
|
||||
for ( DiffElement elt : getCompoundElements() ) {
|
||||
b.append(elt.toString(offset + 4)).append('\n');
|
||||
}
|
||||
b.append(off).append(")").append("\n");
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toOneLineString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
b.append('(');
|
||||
List<String> parts = new ArrayList<String>();
|
||||
for ( DiffElement elt : getElements() )
|
||||
parts.add(elt.toOneLineString());
|
||||
b.append(Utils.join(" ", parts));
|
||||
b.append(')');
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// fromString and toOneLineString
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static DiffElement fromString(String tree) {
|
||||
return fromString(tree, DiffElement.ROOT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Doesn't support full tree structure parsing
|
||||
* @param tree
|
||||
* @param parent
|
||||
* @return
|
||||
*/
|
||||
private static DiffElement fromString(String tree, DiffElement parent) {
|
||||
// X=(A=A B=B C=(D=D))
|
||||
String[] parts = tree.split("=", 2);
|
||||
if ( parts.length != 2 )
|
||||
throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
|
||||
String name = parts[0];
|
||||
String value = parts[1];
|
||||
|
||||
if ( value.length() == 0 )
|
||||
throw new ReviewedStingException("Illegal tree structure: " + value + " at " + tree);
|
||||
|
||||
if ( value.charAt(0) == '(' ) {
|
||||
if ( ! value.endsWith(")") )
|
||||
throw new ReviewedStingException("Illegal tree structure. Missing ): " + value + " at " + tree);
|
||||
String subtree = value.substring(1, value.length()-1);
|
||||
DiffNode rec = DiffNode.empty(name, parent);
|
||||
String[] subParts = subtree.split(" ");
|
||||
for ( String subPart : subParts ) {
|
||||
rec.add(fromString(subPart, rec.getBinding()));
|
||||
}
|
||||
return rec.getBinding();
|
||||
} else {
|
||||
return new DiffValue(name, parent, value).getBinding();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Compares two record-oriented files, itemizing specific difference between equivalent
|
||||
* records in the two files. Reports both itemized and summarized differences.
|
||||
* @author Mark DePristo
|
||||
* @version 0.1
|
||||
*/
|
||||
@Requires(value={})
|
||||
public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
||||
@Output(doc="File to which results should be written",required=true)
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
|
||||
int MAX_OBJECTS_TO_READ = -1;
|
||||
|
||||
@Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false)
|
||||
int MAX_DIFFS = 0;
|
||||
|
||||
@Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false)
|
||||
int MAX_COUNT1_DIFFS = 0;
|
||||
|
||||
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
|
||||
int minCountForDiff = 1;
|
||||
|
||||
@Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false)
|
||||
boolean showItemizedDifferences = false;
|
||||
|
||||
@Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true)
|
||||
File masterFile;
|
||||
|
||||
@Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true)
|
||||
File testFile;
|
||||
|
||||
final DiffEngine diffEngine = new DiffEngine();
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer counter, Integer sum) {
|
||||
return counter + sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer sum) {
|
||||
out.printf("Reading master file %s%n", masterFile);
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
|
||||
out.printf(" Read %d objects%n", master.size());
|
||||
out.printf("Reading test file %s%n", testFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
|
||||
out.printf(" Read %d objects%n", test.size());
|
||||
|
||||
// out.printf("Master diff objects%n");
|
||||
// out.println(master.toString());
|
||||
// out.printf("Test diff objects%n");
|
||||
// out.println(test.toString());
|
||||
|
||||
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( SpecificDifference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:55 PM
|
||||
*
|
||||
* An interface that must be implemented to allow us to calculate differences
|
||||
* between structured objects
|
||||
*/
|
||||
public class DiffValue {
|
||||
private DiffElement binding = null;
|
||||
final private Object value;
|
||||
|
||||
public DiffValue(Object value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public DiffValue(DiffElement binding, Object value) {
|
||||
this.binding = binding;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public DiffValue(DiffValue parent, Object value) {
|
||||
this(parent.getBinding(), value);
|
||||
}
|
||||
|
||||
public DiffValue(String name, DiffElement parent, Object value) {
|
||||
this.binding = new DiffElement(name, parent, this);
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public DiffValue(String name, DiffValue parent, Object value) {
|
||||
this(name, parent.getBinding(), value);
|
||||
}
|
||||
|
||||
public DiffElement getBinding() {
|
||||
return binding;
|
||||
}
|
||||
|
||||
protected void setBinding(DiffElement binding) {
|
||||
this.binding = binding;
|
||||
}
|
||||
|
||||
public Object getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getValue().toString();
|
||||
}
|
||||
|
||||
public String toString(int offset) {
|
||||
return toString();
|
||||
}
|
||||
|
||||
public String toOneLineString() {
|
||||
return getValue().toString();
|
||||
}
|
||||
|
||||
public boolean isAtomic() { return true; }
|
||||
public boolean isCompound() { return ! isAtomic(); }
|
||||
public int size() { return 1; }
|
||||
}
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 1:09 PM
|
||||
*
|
||||
* Interface for readers creating diffable objects from a file
|
||||
*/
|
||||
public interface DiffableReader {
|
||||
@Ensures("result != null")
|
||||
/**
|
||||
* Return the name of this DiffableReader type. For example, the VCF reader returns 'VCF' and the
|
||||
* bam reader 'BAM'
|
||||
*/
|
||||
public String getName();
|
||||
|
||||
@Ensures("result != null")
|
||||
@Requires("file != null")
|
||||
/**
|
||||
* Read up to maxElementsToRead DiffElements from file, and return them.
|
||||
*/
|
||||
public DiffElement readFromFile(File file, int maxElementsToRead);
|
||||
|
||||
/**
|
||||
* Return true if the file can be read into DiffElement objects with this reader. This should
|
||||
* be uniquely true/false for all readers, as the system will use the first reader that can read the
|
||||
* file. This routine should never throw an exception. The VCF reader, for example, looks at the
|
||||
* first line of the file for the ##format=VCF4.1 header, and the BAM reader for the BAM_MAGIC value
|
||||
* @param file
|
||||
* @return
|
||||
*/
|
||||
@Requires("file != null")
|
||||
public boolean canRead(File file);
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
public class Difference implements Comparable<Difference> {
|
||||
final String path; // X.Y.Z
|
||||
final String[] parts;
|
||||
int count = 0;
|
||||
|
||||
public Difference(String path) {
|
||||
this.path = path;
|
||||
this.parts = DiffEngine.diffNameToPath(path);
|
||||
}
|
||||
|
||||
public String[] getParts() {
|
||||
return parts;
|
||||
}
|
||||
|
||||
public void incCount() { count++; }
|
||||
|
||||
public int getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* The fully qualified path object A.B.C etc
|
||||
* @return
|
||||
*/
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the length of the parts of this summary
|
||||
*/
|
||||
public int length() {
|
||||
return this.parts.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the string parts matches this summary. Matches are
|
||||
* must be equal() everywhere where this summary isn't *.
|
||||
* @param otherParts
|
||||
* @return
|
||||
*/
|
||||
public boolean matches(String[] otherParts) {
|
||||
if ( otherParts.length != length() )
|
||||
return false;
|
||||
|
||||
// TODO optimization: can start at right most non-star element
|
||||
for ( int i = 0; i < length(); i++ ) {
|
||||
String part = parts[i];
|
||||
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s:%d", getPath(), getCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Difference other) {
|
||||
// sort first highest to lowest count, then by lowest to highest path
|
||||
int countCmp = Integer.valueOf(count).compareTo(other.count);
|
||||
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:53 PM
|
||||
*
|
||||
* Represents a specific difference between two specific DiffElements
|
||||
*/
|
||||
public class SpecificDifference extends Difference {
|
||||
DiffElement master, test;
|
||||
|
||||
public SpecificDifference(DiffElement master, DiffElement test) {
|
||||
super(createName(master, test));
|
||||
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
|
||||
this.master = master;
|
||||
this.test = test;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s:%s!=%s",
|
||||
getPath(),
|
||||
getOneLineString(master),
|
||||
getOneLineString(test));
|
||||
}
|
||||
|
||||
private static String createName(DiffElement master, DiffElement test) {
|
||||
return (master == null ? test : master).fullyQualifiedName();
|
||||
}
|
||||
|
||||
private static String getOneLineString(DiffElement elt) {
|
||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 1:09 PM
|
||||
*
|
||||
* Class implementing diffnode reader for VCF
|
||||
*/
|
||||
public class VCFDiffableReader implements DiffableReader {
|
||||
@Override
|
||||
public String getName() { return "VCF"; }
|
||||
|
||||
@Override
|
||||
public DiffElement readFromFile(File file, int maxElementsToRead) {
|
||||
DiffNode root = DiffNode.rooted(file.getName());
|
||||
try {
|
||||
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
|
||||
VCFCodec vcfCodec = new VCFCodec();
|
||||
|
||||
// must be read as state is stored in reader itself
|
||||
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
|
||||
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
|
||||
String key = headerLine.getKey();
|
||||
if ( headerLine instanceof VCFNamedHeaderLine )
|
||||
key += "_" + ((VCFNamedHeaderLine) headerLine).getName();
|
||||
root.add(key, headerLine.toString());
|
||||
}
|
||||
|
||||
String line = lineReader.readLine();
|
||||
int count = 0;
|
||||
while ( line != null ) {
|
||||
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
|
||||
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
||||
String name = vc.getChr() + ":" + vc.getStart();
|
||||
DiffNode vcRoot = DiffNode.empty(name, root);
|
||||
|
||||
// add fields
|
||||
vcRoot.add("CHROM", vc.getChr());
|
||||
vcRoot.add("POS", vc.getStart());
|
||||
vcRoot.add("ID", vc.hasID() ? vc.getID() : VCFConstants.MISSING_VALUE_v4);
|
||||
vcRoot.add("REF", vc.getReference());
|
||||
vcRoot.add("ALT", vc.getAlternateAlleles());
|
||||
vcRoot.add("QUAL", vc.hasNegLog10PError() ? vc.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4);
|
||||
vcRoot.add("FILTER", vc.getFilters());
|
||||
|
||||
// add info fields
|
||||
for (Map.Entry<String, Object> attribute : vc.getAttributes().entrySet()) {
|
||||
if ( ! attribute.getKey().startsWith("_") && ! attribute.getKey().equals(VariantContext.ID_KEY))
|
||||
vcRoot.add(attribute.getKey(), attribute.getValue());
|
||||
}
|
||||
|
||||
for (Genotype g : vc.getGenotypes().values() ) {
|
||||
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
|
||||
gRoot.add("GT", g.getGenotypeString());
|
||||
gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 );
|
||||
|
||||
for (Map.Entry<String, Object> attribute : g.getAttributes().entrySet()) {
|
||||
if ( ! attribute.getKey().startsWith("_") )
|
||||
gRoot.add(attribute.getKey(), attribute.getValue());
|
||||
}
|
||||
|
||||
vcRoot.add(gRoot);
|
||||
}
|
||||
|
||||
root.add(vcRoot);
|
||||
line = lineReader.readLine();
|
||||
}
|
||||
|
||||
lineReader.close();
|
||||
} catch ( IOException e ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return root.getBinding();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRead(File file) {
|
||||
try {
|
||||
final String VCF4_HEADER = "##fileformat=VCFv4";
|
||||
char[] buff = new char[VCF4_HEADER.length()];
|
||||
new FileReader(file).read(buff, 0, VCF4_HEADER.length());
|
||||
String firstLine = new String(buff);
|
||||
return firstLine.startsWith(VCF4_HEADER);
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
|
|
@ -37,7 +38,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -48,7 +48,7 @@ import java.io.PrintStream;
|
|||
* multi-sample data. The user can choose from several different incorporated calculation models.
|
||||
*/
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@ReadFilters( {BadMateFilter.class} )
|
||||
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} )
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
@By(DataSource.REFERENCE)
|
||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
|
||||
|
|
@ -158,7 +158,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
}
|
||||
|
||||
// FORMAT and INFO fields
|
||||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
||||
headerInfo.addAll(getSupportedHeaderStrings());
|
||||
|
||||
// FILTER fields
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
||||
|
|
@ -167,6 +167,20 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
return headerInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
|
||||
* @return a set of VCF format lines
|
||||
*/
|
||||
private static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute at a given locus.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -634,17 +634,27 @@ public class UnifiedGenotyperEngine {
|
|||
if (vcInput == null)
|
||||
return null;
|
||||
|
||||
if (vcInput.isSNP() && ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
|
||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
// todo - no support to genotype MNP's yet
|
||||
if (vcInput.isMNP())
|
||||
return null;
|
||||
|
||||
if (vcInput.isSNP()) {
|
||||
if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
|
||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
else
|
||||
// ignore SNP's if user chose INDEL mode
|
||||
return null;
|
||||
}
|
||||
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
|
||||
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
|
||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -50,7 +50,7 @@ import java.io.PrintStream;
|
|||
/**
|
||||
* Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
|
||||
*/
|
||||
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, BadCigarFilter.class})
|
||||
@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class})
|
||||
@Reference(window=@Window(start=-1,stop=50))
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@By(DataSource.REFERENCE)
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ import java.util.*;
|
|||
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
|
||||
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
|
||||
*/
|
||||
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, PlatformUnitFilter.class})
|
||||
@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class})
|
||||
public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||
// @Output
|
||||
// PrintStream out;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -58,7 +58,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
|
|||
@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
|
||||
@By(DataSource.READS)
|
||||
|
||||
@ReadFilters({ZeroMappingQualityReadFilter.class})
|
||||
@ReadFilters({MappingQualityZeroReadFilter.class})
|
||||
// Filter out all reads with zero mapping quality
|
||||
|
||||
public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
|
||||
|
|
@ -220,6 +220,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
}
|
||||
else {
|
||||
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
|
||||
|
||||
if (DEBUG)
|
||||
logger.debug("Unprocessed variant = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc));
|
||||
}
|
||||
|
||||
int numReads = 0;
|
||||
|
|
@ -239,7 +242,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
return new PhasingStatsAndOutput(phaseStats, completedList);
|
||||
}
|
||||
|
||||
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList("PQ"));
|
||||
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList(PQ_KEY));
|
||||
|
||||
private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
|
|
@ -1105,7 +1108,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
this.alleles = vc.getAlleles();
|
||||
this.genotypes = new HashMap<String, Genotype>(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable
|
||||
this.negLog10PError = vc.getNegLog10PError();
|
||||
this.filters = vc.getFilters();
|
||||
this.filters = vc.filtersWereApplied() ? vc.getFilters() : null;
|
||||
this.attributes = new HashMap<String, Object>(vc.getAttributes());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
|||
|
||||
import org.broad.tribble.bed.BEDCodec;
|
||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.commandline.Gather;
|
||||
|
|
@ -34,7 +35,7 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -75,7 +76,7 @@ import java.util.Map;
|
|||
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
|
||||
@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
|
||||
@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
|
||||
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
|
||||
|
|
@ -30,10 +30,9 @@ import org.broadinstitute.sting.utils.exceptions.StingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import net.sf.picard.reference.FastaSequenceFile;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
import java.io.File;
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
@Analysis(description = "Counts different classes of variants in the sample")
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -9,10 +10,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair;
|
||||
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.IndelUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,22 +1,16 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Mendelian violation detection and counting
|
||||
* <p/>
|
||||
|
|
|
|||
|
|
@ -24,12 +24,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
|
||||
@Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites")
|
||||
public class PrintMissingComp extends VariantEvaluator {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -8,11 +9,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Degeneracy;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
@Analysis(description = "Ti/Tv Variant Evaluator")
|
||||
|
|
|
|||
|
|
@ -1,14 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -25,14 +25,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.tags;
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
||||
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
|
|
@ -23,8 +23,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.lang.annotation.Annotation;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.tags;
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
||||
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
|
|
@ -7,15 +7,12 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -199,8 +199,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( VCFHeaderLine field : hInfo ) {
|
||||
|
|
|
|||
|
|
@ -9,9 +9,13 @@ import net.sf.samtools.SAMUtils;
|
|||
* @author Kiran Garimella
|
||||
*/
|
||||
public class QualityUtils {
|
||||
|
||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
public final static double MIN_REASONABLE_ERROR = 0.0001;
|
||||
public final static byte MAX_REASONABLE_Q_SCORE = 40;
|
||||
public final static byte MIN_USABLE_Q_SCORE = 6;
|
||||
|
||||
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
|
||||
|
||||
/**
|
||||
* Private constructor. No instantiating this class!
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import org.broad.tribble.NameAwareCodec;
|
|||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -96,6 +98,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
for ( String str : headerStrings ) {
|
||||
if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
|
||||
String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR);
|
||||
if ( strings.length < VCFHeader.HEADER_FIELDS.values().length )
|
||||
throw new TribbleException.InvalidHeader("there are not enough columns present in the header line: " + str);
|
||||
|
||||
int arrayIndex = 0;
|
||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||
try {
|
||||
|
|
@ -159,12 +164,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
}
|
||||
|
||||
private Feature reallyDecode(String line) {
|
||||
try {
|
||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||
|
||||
// our header cannot be null, we need the genotype sample names and counts
|
||||
if (header == null) throw new IllegalStateException("VCF Header cannot be null when decoding a record");
|
||||
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
||||
|
||||
if (parts == null)
|
||||
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
||||
|
|
@ -174,17 +178,18 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||
if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
|
||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||
throw new IllegalArgumentException("There aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||
" tokens, and saw " + nParts + " )");
|
||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||
" tokens, and saw " + nParts + " )", lineNo);
|
||||
|
||||
return parseVCFLine(parts);
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InvalidDecodeLine(e.getMessage(), line);
|
||||
}
|
||||
}
|
||||
|
||||
protected void generateException(String message) {
|
||||
throw new TribbleException.InvalidDecodeLine(message, lineNo);
|
||||
throw new UserException.MalformedVCF(message, lineNo);
|
||||
}
|
||||
|
||||
private static void generateException(String message, int lineNo) {
|
||||
throw new UserException.MalformedVCF(message, lineNo);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -472,10 +477,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
return true;
|
||||
}
|
||||
|
||||
private static void generateException(String message, int lineNo) {
|
||||
throw new TribbleException.InvalidDecodeLine(message, lineNo);
|
||||
}
|
||||
|
||||
private static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||
boolean clipping = true;
|
||||
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broad.tribble.index.IndexFactory;
|
|||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broad.tribble.util.PositionalStream;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
|
@ -123,12 +124,10 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
|
||||
try {
|
||||
// the file format field needs to be written first
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString() + "\n");
|
||||
|
||||
for ( VCFHeaderLine line : mHeader.getMetaData() ) {
|
||||
if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
|
||||
line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ||
|
||||
line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) )
|
||||
if ( VCFHeaderVersion.isFormatString(line.getKey()) )
|
||||
continue;
|
||||
|
||||
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
|
||||
|
|
@ -302,10 +301,7 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
} else {
|
||||
List<String> genotypeAttributeKeys = new ArrayList<String>();
|
||||
if ( vc.hasGenotypes() ) {
|
||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( String key : calcVCFGenotypeKeys(vc) ) {
|
||||
genotypeAttributeKeys.add(key);
|
||||
}
|
||||
genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc));
|
||||
} else if ( mHeader.hasGenotypingData() ) {
|
||||
// this needs to be done in case all samples are no-calls
|
||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
|
|
@ -358,16 +354,8 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
mWriter.write(key);
|
||||
|
||||
if ( !entry.getValue().equals("") ) {
|
||||
int numVals = 1;
|
||||
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
|
||||
if ( metaData != null )
|
||||
numVals = metaData.getCount();
|
||||
|
||||
// take care of unbounded encoding
|
||||
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
|
||||
numVals = 1;
|
||||
|
||||
if ( numVals > 0 ) {
|
||||
if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) {
|
||||
mWriter.write("=");
|
||||
mWriter.write(entry.getValue());
|
||||
}
|
||||
|
|
@ -397,16 +385,22 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
continue;
|
||||
}
|
||||
|
||||
writeAllele(g.getAllele(0), alleleMap);
|
||||
for (int i = 1; i < g.getPloidy(); i++) {
|
||||
mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
|
||||
writeAllele(g.getAllele(i), alleleMap);
|
||||
}
|
||||
|
||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
for ( String key : genotypeFormatKeys ) {
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
if ( !g.isAvailable() ) {
|
||||
throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record");
|
||||
}
|
||||
|
||||
writeAllele(g.getAllele(0), alleleMap);
|
||||
for (int i = 1; i < g.getPloidy(); i++) {
|
||||
mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
|
||||
writeAllele(g.getAllele(i), alleleMap);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
|
|
@ -423,7 +417,7 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount();
|
||||
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
|
|
@ -450,9 +444,10 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
break;
|
||||
}
|
||||
|
||||
for (String s : attrs ) {
|
||||
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
|
||||
mWriter.write(s);
|
||||
for (int i = 0; i < attrs.size(); i++) {
|
||||
if ( i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY) )
|
||||
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
|
||||
mWriter.write(attrs.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -498,10 +493,13 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
|
||||
Set<String> keys = new HashSet<String>();
|
||||
|
||||
boolean sawGoodGT = false;
|
||||
boolean sawGoodQual = false;
|
||||
boolean sawGenotypeFilter = false;
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
keys.addAll(g.getAttributes().keySet());
|
||||
if ( g.isAvailable() )
|
||||
sawGoodGT = true;
|
||||
if ( g.hasNegLog10PError() )
|
||||
sawGoodQual = true;
|
||||
if (g.isFiltered() && g.isCalled())
|
||||
|
|
@ -514,7 +512,17 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
if (sawGenotypeFilter)
|
||||
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
|
||||
return ParsingUtils.sortList(new ArrayList<String>(keys));
|
||||
List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));
|
||||
|
||||
// make sure the GT is first
|
||||
if ( sawGoodGT ) {
|
||||
List<String> newList = new ArrayList<String>(sortedList.size()+1);
|
||||
newList.add(VCFConstants.GENOTYPE_KEY);
|
||||
newList.addAll(sortedList);
|
||||
sortedList = newList;
|
||||
}
|
||||
|
||||
return sortedList;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -141,8 +141,6 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
if (i != 0)
|
||||
generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes");
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
|
|
@ -156,12 +154,13 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a gentoype field
|
||||
if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field");
|
||||
// check to make sure we found a genotype field
|
||||
if ( genotypeAlleleLocation < 0 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes");
|
||||
|
||||
// todo -- assuming allele list length in the single digits is bad. Fix me.
|
||||
// Check for > 1 for haploid genotypes
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|';
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* A class representing a key=value entry for ALT fields in the VCF header
|
||||
*/
|
||||
public class VCFAltHeaderLine extends VCFSimpleHeaderLine {
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFAltHeaderLine(String name, String description) {
|
||||
super(name, description, SupportedHeaderLineType.ALT);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the vcf header version
|
||||
*/
|
||||
protected VCFAltHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super(line, version, SupportedHeaderLineType.ALT);
|
||||
}
|
||||
}
|
||||
|
|
@ -145,8 +145,6 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
|
||||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
if (i != 0)
|
||||
generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes");
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
|
|
@ -160,22 +158,24 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a gentoype field
|
||||
// TODO -- This is no longer required in v4.1
|
||||
if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field");
|
||||
// check to make sure we found a genotype field if we are a VCF4.0 file
|
||||
if ( version == VCFHeaderVersion.VCF4_0 && genotypeAlleleLocation == -1 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
|
||||
|
||||
// todo -- assuming allele list length in the single digits is bad. Fix me.
|
||||
// Check for > 1 for haploid genotypes
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|';
|
||||
List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? null : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||
boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.put(sampleName, new Genotype(sampleName,
|
||||
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
genotypes.put(sampleName,
|
||||
new Genotype(sampleName,
|
||||
GTalleles,
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -43,26 +45,43 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
|
||||
// the field types
|
||||
private String name;
|
||||
private int count;
|
||||
private int count = -1;
|
||||
private VCFHeaderLineCount countType;
|
||||
private String description;
|
||||
private VCFHeaderLineType type;
|
||||
|
||||
// access methods
|
||||
public String getName() { return name; }
|
||||
public int getCount() { return count; }
|
||||
public String getDescription() { return description; }
|
||||
public VCFHeaderLineType getType() { return type; }
|
||||
public VCFHeaderLineCount getCountType() { return countType; }
|
||||
public int getCount() {
|
||||
if ( countType != VCFHeaderLineCount.INTEGER )
|
||||
throw new ReviewedStingException("Asking for header line count when type is not an integer");
|
||||
return count;
|
||||
}
|
||||
|
||||
//
|
||||
public void setNumberToUnbounded() { this.count = UNBOUNDED; }
|
||||
// utility method
|
||||
public int getCount(int numAltAlleles) {
|
||||
int myCount;
|
||||
switch ( countType ) {
|
||||
case INTEGER: myCount = count; break;
|
||||
case UNBOUNDED: myCount = -1; break;
|
||||
case A: myCount = numAltAlleles; break;
|
||||
case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break;
|
||||
default: throw new ReviewedStingException("Unknown count type: " + countType);
|
||||
}
|
||||
return myCount;
|
||||
}
|
||||
|
||||
public void setNumberToUnbounded() {
|
||||
countType = VCFHeaderLineCount.UNBOUNDED;
|
||||
count = -1;
|
||||
}
|
||||
|
||||
// our type of line, i.e. format, info, etc
|
||||
private final SupportedHeaderLineType lineType;
|
||||
|
||||
// line numerical values are allowed to be unbounded (or unknown), which is
|
||||
// marked with a dot (.)
|
||||
public static final int UNBOUNDED = -1; // the value we store internally for unbounded types
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
|
|
@ -70,10 +89,12 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
* @param count the count for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
* @param lineType the header line type
|
||||
*/
|
||||
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.name = name;
|
||||
this.countType = VCFHeaderLineCount.INTEGER;
|
||||
this.count = count;
|
||||
this.type = type;
|
||||
this.description = description;
|
||||
|
|
@ -81,20 +102,53 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
validate();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param count the count type for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
* @param lineType the header line type
|
||||
*/
|
||||
protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.name = name;
|
||||
this.countType = count;
|
||||
this.type = type;
|
||||
this.description = description;
|
||||
this.lineType = lineType;
|
||||
validate();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the VCF header version
|
||||
* @param lineType the header line type
|
||||
*
|
||||
*/
|
||||
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
|
||||
name = mapping.get("ID");
|
||||
count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ?
|
||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
|
||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
||||
count = -1;
|
||||
final String numberStr = mapping.get("Number");
|
||||
if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) {
|
||||
countType = VCFHeaderLineCount.A;
|
||||
} else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) {
|
||||
countType = VCFHeaderLineCount.G;
|
||||
} else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) &&
|
||||
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
|
||||
((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) &&
|
||||
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) {
|
||||
countType = VCFHeaderLineCount.UNBOUNDED;
|
||||
} else {
|
||||
countType = VCFHeaderLineCount.INTEGER;
|
||||
count = Integer.valueOf(numberStr);
|
||||
|
||||
}
|
||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
|
||||
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
|
||||
|
|
@ -121,7 +175,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
|
||||
Object number;
|
||||
switch ( countType ) {
|
||||
case A: number = VCFConstants.PER_ALLELE_COUNT; break;
|
||||
case G: number = VCFConstants.PER_GENOTYPE_COUNT; break;
|
||||
case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break;
|
||||
case INTEGER:
|
||||
default: number = count;
|
||||
}
|
||||
map.put("Number", number);
|
||||
map.put("Type", type);
|
||||
map.put("Description", description);
|
||||
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
||||
|
|
@ -136,15 +198,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
if ( !(o instanceof VCFCompoundHeaderLine) )
|
||||
return false;
|
||||
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
|
||||
return name.equals(other.name) &&
|
||||
count == other.count &&
|
||||
description.equals(other.description) &&
|
||||
type == other.type &&
|
||||
lineType == other.lineType;
|
||||
return equalsExcludingDescription(other) &&
|
||||
description.equals(other.description);
|
||||
}
|
||||
|
||||
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
|
||||
return count == other.count &&
|
||||
countType == other.countType &&
|
||||
type == other.type &&
|
||||
lineType == other.lineType &&
|
||||
name.equals(other.name);
|
||||
|
|
|
|||
|
|
@ -99,6 +99,8 @@ public final class VCFConstants {
|
|||
public static final String MISSING_DEPTH_v3 = "-1";
|
||||
public static final String UNBOUNDED_ENCODING_v4 = ".";
|
||||
public static final String UNBOUNDED_ENCODING_v3 = "-1";
|
||||
public static final String PER_ALLELE_COUNT = "A";
|
||||
public static final String PER_GENOTYPE_COUNT = "G";
|
||||
public static final String EMPTY_ALLELE = ".";
|
||||
public static final String EMPTY_GENOTYPE = "./.";
|
||||
public static final double MAX_GENOTYPE_QUAL = 99.0;
|
||||
|
|
|
|||
|
|
@ -1,19 +1,10 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* A class representing a key=value entry for FILTER fields in the VCF header
|
||||
*/
|
||||
public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||
|
||||
private String name;
|
||||
private String description;
|
||||
|
||||
public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
|
|
@ -22,12 +13,7 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
|
|||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFFilterHeaderLine(String name, String description) {
|
||||
super("FILTER", "");
|
||||
this.name = name;
|
||||
this.description = description;
|
||||
|
||||
if ( name == null || description == null )
|
||||
throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
|
||||
super(name, description, SupportedHeaderLineType.FILTER);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -37,34 +23,6 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
|
|||
* @param version the vcf header version
|
||||
*/
|
||||
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super("FILTER", "");
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
|
||||
name = mapping.get("ID");
|
||||
description = mapping.get("Description");
|
||||
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
|
||||
description = UNBOUND_DESCRIPTION;
|
||||
}
|
||||
|
||||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Description", description);
|
||||
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFFilterHeaderLine) )
|
||||
return false;
|
||||
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
|
||||
return name.equals(other.name) &&
|
||||
description.equals(other.description);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
super(line, version, SupportedHeaderLineType.FILTER);
|
||||
}
|
||||
}
|
||||
|
|
@ -16,6 +16,10 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
|
|||
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
|
||||
}
|
||||
|
||||
public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
|
||||
super(name, count, type, description, SupportedHeaderLineType.FORMAT);
|
||||
}
|
||||
|
||||
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super(line, version, SupportedHeaderLineType.FORMAT);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
/**
|
||||
* the count encodings we use for fields in VCF header lines
|
||||
*/
|
||||
public enum VCFHeaderLineCount {
|
||||
INTEGER, A, G, UNBOUNDED;
|
||||
}
|
||||
|
|
@ -13,6 +13,10 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
|
|||
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
||||
}
|
||||
|
||||
public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
|
||||
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
||||
}
|
||||
|
||||
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super(line, version, SupportedHeaderLineType.INFO);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* A class representing a key=value entry for simple VCF header types
|
||||
*/
|
||||
public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||
|
||||
public enum SupportedHeaderLineType {
|
||||
FILTER, ALT;
|
||||
}
|
||||
|
||||
private String name;
|
||||
private String description;
|
||||
|
||||
// our type of line, i.e. filter, alt, etc
|
||||
private final SupportedHeaderLineType lineType;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param description the description for this header line
|
||||
* @param lineType the header line type
|
||||
*/
|
||||
public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.lineType = lineType;
|
||||
this.name = name;
|
||||
this.description = description;
|
||||
|
||||
if ( name == null || description == null )
|
||||
throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the vcf header version
|
||||
* @param lineType the header line type
|
||||
*/
|
||||
protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.lineType = lineType;
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
|
||||
name = mapping.get("ID");
|
||||
description = mapping.get("Description");
|
||||
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
|
||||
description = UNBOUND_DESCRIPTION;
|
||||
}
|
||||
|
||||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Description", description);
|
||||
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFSimpleHeaderLine) )
|
||||
return false;
|
||||
VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o;
|
||||
return name.equals(other.name) &&
|
||||
description.equals(other.description);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
}
|
||||
|
|
@ -180,19 +180,4 @@ public class VCFUtils {
|
|||
|
||||
return new HashSet<VCFHeaderLine>(map.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
|
||||
* @return a set of VCF format lines
|
||||
*/
|
||||
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -154,6 +154,16 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class MalformedVCF extends UserException {
|
||||
public MalformedVCF(String message, String line) {
|
||||
super(String.format("The provided VCF file is malformed at line %s: %s", line, message));
|
||||
}
|
||||
|
||||
public MalformedVCF(String message, int lineNo) {
|
||||
super(String.format("The provided VCF file is malformed at line nmber %d: %s", lineNo, message));
|
||||
}
|
||||
}
|
||||
|
||||
public static class ReadMissingReadGroup extends MalformedBAM {
|
||||
public ReadMissingReadGroup(SAMRecord read) {
|
||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ public class Allele implements Comparable<Allele> {
|
|||
this.bases = bases;
|
||||
|
||||
if ( ! acceptableAlleleBases(bases) )
|
||||
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
|
||||
throw new IllegalArgumentException("Unexpected base in allele bases \'" + new String(bases)+"\'");
|
||||
}
|
||||
|
||||
private Allele(String bases, boolean isRef) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -19,12 +20,14 @@ public class Genotype {
|
|||
protected InferredGeneticContext commonInfo;
|
||||
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
|
||||
protected List<Allele> alleles = null; // new ArrayList<Allele>();
|
||||
protected Type type = null;
|
||||
|
||||
protected boolean isPhased = false;
|
||||
private boolean filtersWereAppliedToContext;
|
||||
protected boolean filtersWereAppliedToContext;
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
if ( alleles != null )
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
||||
filtersWereAppliedToContext = filters != null;
|
||||
this.isPhased = isPhased;
|
||||
|
|
@ -66,6 +69,9 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public List<Allele> getAlleles(Allele allele) {
|
||||
if ( getType() == Type.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
|
||||
List<Allele> al = new ArrayList<Allele>();
|
||||
for ( Allele a : alleles )
|
||||
if ( a.equals(allele) )
|
||||
|
|
@ -75,6 +81,8 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public Allele getAllele(int i) {
|
||||
if ( getType() == Type.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
|
|
@ -89,10 +97,21 @@ public class Genotype {
|
|||
NO_CALL,
|
||||
HOM_REF,
|
||||
HET,
|
||||
HOM_VAR
|
||||
HOM_VAR,
|
||||
UNAVAILABLE
|
||||
}
|
||||
|
||||
public Type getType() {
|
||||
if ( type == null ) {
|
||||
type = determineType();
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
protected Type determineType() {
|
||||
if ( alleles == null )
|
||||
return Type.UNAVAILABLE;
|
||||
|
||||
Allele firstAllele = alleles.get(0);
|
||||
|
||||
if ( firstAllele.isNoCall() ) {
|
||||
|
|
@ -122,7 +141,8 @@ public class Genotype {
|
|||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
|
||||
*/
|
||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL; }
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
||||
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
||||
|
||||
//
|
||||
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
||||
|
|
@ -157,8 +177,8 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public void validate() {
|
||||
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
|
||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
|
||||
if ( alleles == null ) return;
|
||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0");
|
||||
|
||||
int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) {
|
||||
|
|
@ -175,6 +195,9 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public String getGenotypeString(boolean ignoreRefState) {
|
||||
if ( alleles == null )
|
||||
return null;
|
||||
|
||||
// Notes:
|
||||
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
|
||||
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
|
||||
|
|
|
|||
|
|
@ -867,7 +867,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
for ( String name : sampleNames ) {
|
||||
if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames);
|
||||
map.put(name, getGenotype(name));
|
||||
final Genotype g = getGenotype(name);
|
||||
if ( g != null ) {
|
||||
map.put(name, g);
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
|
|
@ -1203,9 +1206,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName());
|
||||
|
||||
for ( Allele gAllele : g.getAlleles() ) {
|
||||
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
|
||||
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
|
||||
if ( g.isAvailable() ) {
|
||||
for ( Allele gAllele : g.getAlleles() ) {
|
||||
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
|
||||
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.apache.commons.io.FileUtils;
|
|||
import org.apache.log4j.*;
|
||||
import org.apache.log4j.spi.LoggingEvent;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
|
||||
|
|
@ -334,11 +335,14 @@ public abstract class BaseTest {
|
|||
|
||||
if (parameterize || expectedMD5.equals("")) {
|
||||
// Don't assert
|
||||
} else {
|
||||
Assert.assertEquals(filemd5sum, expectedMD5, name + " Mismatching MD5s");
|
||||
} else if ( filemd5sum.equals(expectedMD5) ) {
|
||||
System.out.println(String.format(" => %s PASSED", name));
|
||||
} else {
|
||||
Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
|
||||
}
|
||||
|
||||
|
||||
|
||||
return filemd5sum;
|
||||
}
|
||||
|
||||
|
|
@ -381,7 +385,12 @@ public abstract class BaseTest {
|
|||
System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File);
|
||||
System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File);
|
||||
|
||||
// todo -- add support for simple inline display of the first N differences for text file
|
||||
// inline differences
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0);
|
||||
boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params);
|
||||
if ( success )
|
||||
System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n",
|
||||
pathToExpectedMD5File, pathToFileMD5File);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,9 @@
|
|||
package org.broadinstitute.sting;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
|
|
@ -63,8 +65,20 @@ public class WalkerTest extends BaseTest {
|
|||
throw new StingException("Found an index created for file " + resultFile + " but we can only validate VCF files. Extend this code!");
|
||||
}
|
||||
|
||||
System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
|
||||
Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(resultFile, indexFile, new VCFCodec()), "Index on disk from indexing on the fly not equal to the index created after the run completed");
|
||||
assertOnDiskIndexEqualToNewlyCreatedIndex(indexFile, name, resultFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static void assertOnDiskIndexEqualToNewlyCreatedIndex(final File indexFile, final String name, final File resultFile) {
|
||||
System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
|
||||
Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec());
|
||||
Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath());
|
||||
|
||||
if ( ! indexFromOutputFile.equalsIgnoreTimestamp(dynamicIndex) ) {
|
||||
Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n",
|
||||
indexFromOutputFile.getProperties(),
|
||||
dynamicIndex.getProperties()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347"));
|
||||
Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c"));
|
||||
executeTest("test file has annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsNotAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("1de8e943fbf55246ebd19efa32f22a58"));
|
||||
Arrays.asList("964f1016ec9a3c55333f62dd834c14d6"));
|
||||
executeTest("test file has annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("93c110e45fd4aedb044a8a5501e23336"));
|
||||
Arrays.asList("8e7de435105499cd71ffc099e268a83e"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("f5cb45910ed719f46159f9f71acaecf4"));
|
||||
Arrays.asList("64b6804cb1e27826e3a47089349be581"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("4b48e7d095ef73e3151542ea976ecd89"));
|
||||
Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsNotAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("28dfbfd178aca071b948cd3dc2365357"));
|
||||
Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a330a5bc3ee72a51dbeb7e6c97a0db99"));
|
||||
Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("3a31d1ef471acfb881a2dec7963fe3f4"));
|
||||
Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("a63fd8ff7bafbd46b7f009144a7c2ad1"));
|
||||
Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
|
||||
Arrays.asList("36378f1245bb99d902fbfe147605bc42"));
|
||||
Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
|
||||
Arrays.asList("0257a1cc3c703535b2d3c5046bf88ab7"));
|
||||
Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
|
||||
Arrays.asList("2d7c73489dcf0db433bebdf79a068764"));
|
||||
Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -111,13 +111,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1,
|
||||
Arrays.asList("2f6efd08d818faa1eb0631844437c64a"));
|
||||
Arrays.asList("e9c0d832dc6b4ed06c955060f830c140"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTabixAnnotations() {
|
||||
final String MD5 = "6c7a6a1c0027bf82656542a9b2671a35";
|
||||
final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";
|
||||
for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -A HomopolymerRun -B:variant,VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1,
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
|
|||
*/
|
||||
|
||||
|
||||
String[] md5WithDashSArg = {"3d3b61a83c1189108eabb2df04218099"};
|
||||
String[] md5WithDashSArg = {"efba4ce1641cfa2ef88a64395f2ebce8"};
|
||||
WalkerTestSpec specWithSArg = new WalkerTestSpec(
|
||||
"-T GenomicAnnotator -R " + b36KGReference +
|
||||
" -B:variant,vcf3 /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf" +
|
||||
|
|
@ -58,7 +58,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("caa562160733aa638e1ba413ede209ae")
|
||||
Arrays.asList("772fc3f43b70770ec6c6acbb8bbbd4c0")
|
||||
);
|
||||
executeTest("testGenomicAnnotatorOnIndels", testOnIndels);
|
||||
}
|
||||
|
|
@ -76,7 +76,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("a4cf76f08fa90284b6988a464b6e0c17")
|
||||
Arrays.asList("081ade7f3d2d3c5f19cb1e8651a626f3")
|
||||
);
|
||||
executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"-B:beagleR2,BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
|
||||
"-B:beagleProbs,BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
|
||||
"-B:beaglePhased,BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
|
||||
"-o %s -NO_HEADER", 1, Arrays.asList("6bccee48ad2f06ba5a8c774fed444478"));
|
||||
"-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4"));
|
||||
executeTest("test BeagleOutputToVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +60,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"-T ProduceBeagleInput -B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
|
||||
"-B:validation,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
|
||||
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2,
|
||||
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","223fb977e8db567dcaf632c6ee51f294"));
|
||||
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166"));
|
||||
executeTest("test BeagleInputWithBootstrap",spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"-B:beagleR2,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
|
||||
"-B:beagleProbs,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
|
||||
"-B:beaglePhased,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
|
||||
"-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("24b88ef8cdf6e347daab491f0256be5a"));
|
||||
"-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965"));
|
||||
|
||||
executeTest("testBeagleChangesSitesToRef",spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Basic unit test for DifferableReaders in reduced reads
|
||||
*/
|
||||
public class DiffEngineUnitTest extends BaseTest {
|
||||
DiffEngine engine;
|
||||
|
||||
@BeforeClass(enabled = true)
|
||||
public void createDiffEngine() {
|
||||
engine = new DiffEngine();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Difference testing routines
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class DifferenceTest extends TestDataProvider {
|
||||
public DiffElement tree1, tree2;
|
||||
public List<String> differences;
|
||||
|
||||
private DifferenceTest(String tree1, String tree2) {
|
||||
this(tree1, tree2, Collections.<String>emptyList());
|
||||
}
|
||||
|
||||
private DifferenceTest(String tree1, String tree2, String difference) {
|
||||
this(tree1, tree2, Arrays.asList(difference));
|
||||
}
|
||||
|
||||
private DifferenceTest(String tree1, String tree2, List<String> differences) {
|
||||
super(DifferenceTest.class);
|
||||
this.tree1 = DiffNode.fromString(tree1);
|
||||
this.tree2 = DiffNode.fromString(tree2);
|
||||
this.differences = differences;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("tree1=%s tree2=%s diff=%s",
|
||||
tree1.toOneLineString(), tree2.toOneLineString(), differences);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "trees")
|
||||
public Object[][] createTrees() {
|
||||
new DifferenceTest("A=X", "A=X");
|
||||
new DifferenceTest("A=X", "A=Y", "A:X!=Y");
|
||||
new DifferenceTest("A=X", "B=X", Arrays.asList("A:X!=MISSING", "B:MISSING!=X"));
|
||||
new DifferenceTest("A=(X=1)", "B=(X=1)", Arrays.asList("A:(X=1)!=MISSING", "B:MISSING!=(X=1)"));
|
||||
new DifferenceTest("A=(X=1)", "A=(X=1)");
|
||||
new DifferenceTest("A=(X=1 Y=2)", "A=(X=1 Y=2)");
|
||||
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=3))");
|
||||
new DifferenceTest("A=(X=1)", "A=(X=2)", "A.X:1!=2");
|
||||
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=4))", "A.B.Z:3!=4");
|
||||
new DifferenceTest("A=(X=1)", "A=(X=1 Y=2)", "A.Y:MISSING!=2");
|
||||
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2)", "A.B:(Z=3)!=MISSING");
|
||||
return DifferenceTest.getTests(DifferenceTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "trees")
|
||||
public void testDiffs(DifferenceTest test) {
|
||||
logger.warn("Test tree1: " + test.tree1.toOneLineString());
|
||||
logger.warn("Test tree2: " + test.tree2.toOneLineString());
|
||||
|
||||
List<SpecificDifference> diffs = engine.diff(test.tree1, test.tree2);
|
||||
logger.warn("Test expected diff : " + test.differences);
|
||||
logger.warn("Observed diffs : " + diffs);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Low-level routines for summarizing differences
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLongestCommonPostfix() {
|
||||
testLongestCommonPostfixHelper("A", "A", 1);
|
||||
testLongestCommonPostfixHelper("A", "B", 0);
|
||||
testLongestCommonPostfixHelper("A.B", "A.B", 2);
|
||||
testLongestCommonPostfixHelper("A.B.C", "A.B.C", 3);
|
||||
testLongestCommonPostfixHelper("A.B.C", "X.B.C", 2);
|
||||
testLongestCommonPostfixHelper("A.B.C", "X.Y.C", 1);
|
||||
testLongestCommonPostfixHelper("A.B.C", "X.Y.Z", 0);
|
||||
testLongestCommonPostfixHelper("A.B.C", "A.X.C", 1);
|
||||
testLongestCommonPostfixHelper("A.B.C", "A.X.Z", 0);
|
||||
testLongestCommonPostfixHelper("A.B.C", "A.B.Z", 0);
|
||||
}
|
||||
|
||||
public void testLongestCommonPostfixHelper(String p1, String p2, int expected) {
|
||||
String[] parts1 = p1.split("\\.");
|
||||
String[] parts2 = p2.split("\\.");
|
||||
int obs = DiffEngine.longestCommonPostfix(parts1, parts2);
|
||||
Assert.assertEquals(obs, expected, "p1=" + p1 + " p2=" + p2 + " failed");
|
||||
}
|
||||
|
||||
@Test(enabled = true, dependsOnMethods = "testLongestCommonPostfix")
|
||||
public void testSummarizePath() {
|
||||
testSummarizePathHelper("A", "A", "A");
|
||||
testSummarizePathHelper("A", "B", "*");
|
||||
testSummarizePathHelper("A.B", "A.B", "A.B");
|
||||
testSummarizePathHelper("A.B", "X.B", "*.B");
|
||||
testSummarizePathHelper("A.B", "X.Y", "*.*");
|
||||
testSummarizePathHelper("A.B.C", "A.B.C", "A.B.C");
|
||||
testSummarizePathHelper("A.B.C", "X.B.C", "*.B.C");
|
||||
testSummarizePathHelper("A.B.C", "X.Y.C", "*.*.C");
|
||||
testSummarizePathHelper("A.B.C", "X.Y.Z", "*.*.*");
|
||||
testSummarizePathHelper("A.B.C", "A.X.C", "*.*.C");
|
||||
testSummarizePathHelper("A.B.C", "A.X.Z", "*.*.*");
|
||||
testSummarizePathHelper("A.B.C", "A.B.Z", "*.*.*");
|
||||
}
|
||||
|
||||
public void testSummarizePathHelper(String p1, String p2, String expected) {
|
||||
String[] parts1 = DiffEngine.diffNameToPath(p1);
|
||||
String[] parts2 = DiffEngine.diffNameToPath(p2);
|
||||
int obs = DiffEngine.longestCommonPostfix(parts1, parts2);
|
||||
String path = DiffEngine.summarizedPath(parts2, obs);
|
||||
Assert.assertEquals(path, expected, "p1=" + p1 + " p2=" + p2 + " failed");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// High-level difference summary
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class SummarizeDifferenceTest extends TestDataProvider {
|
||||
List<String> diffs = new ArrayList<String>();
|
||||
List<String> expecteds = new ArrayList<String>();
|
||||
|
||||
public SummarizeDifferenceTest() { super(SummarizeDifferenceTest.class); }
|
||||
|
||||
public SummarizeDifferenceTest addDiff(String... diffsToAdd) {
|
||||
diffs.addAll(Arrays.asList(diffsToAdd));
|
||||
return this;
|
||||
}
|
||||
|
||||
public SummarizeDifferenceTest addSummary(String... expectedSummary) {
|
||||
expecteds.addAll(Arrays.asList(expectedSummary));
|
||||
return this;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("diffs=%s => expected=%s", diffs, expecteds);
|
||||
}
|
||||
|
||||
public void test() {
|
||||
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
|
||||
for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); }
|
||||
|
||||
List<Difference> sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs);
|
||||
|
||||
Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs);
|
||||
|
||||
for ( int i = 0; i < sumDiffs.size(); i++ ) {
|
||||
Difference sumDiff = sumDiffs.get(i);
|
||||
String expected = expecteds.get(i);
|
||||
String[] pathCount = expected.split(":");
|
||||
String path = pathCount[0];
|
||||
int count = Integer.valueOf(pathCount[1]);
|
||||
Assert.assertEquals(sumDiff.getPath(), path, "Unexpected path at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs);
|
||||
Assert.assertEquals(sumDiff.getCount(), count, "Unexpected counts at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "summaries")
|
||||
public Object[][] createSummaries() {
|
||||
new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
|
||||
new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
|
||||
new SummarizeDifferenceTest().addDiff("A", "A", "A").addSummary("A:3");
|
||||
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B").addSummary("A:3", "B:1");
|
||||
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B").addSummary("A:3", "B:2");
|
||||
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B", "C").addSummary("A:3", "B:2", "C:1");
|
||||
new SummarizeDifferenceTest().addDiff("A.X", "A.X").addSummary("A.X:2");
|
||||
new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X").addSummary("*.X:3", "A.X:2", "B.X:1");
|
||||
new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X", "B.X").addSummary("*.X:4", "A.X:2", "B.X:2");
|
||||
new SummarizeDifferenceTest().addDiff("A.B.C", "X.B.C").addSummary("*.B.C:2", "A.B.C:1", "X.B.C:1");
|
||||
new SummarizeDifferenceTest().addDiff("A.B.C", "X.Y.C", "X.Y.C").addSummary("*.*.C:3", "X.Y.C:2", "A.B.C:1");
|
||||
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "X.Y.C").addSummary("*.*.C:3", "A.B.C:1", "A.X.C:1", "X.Y.C:1");
|
||||
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C").addSummary("*.*.C:3", "*.X.C:2", "A.B.C:1", "A.X.C:1", "B.X.C:1");
|
||||
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C", "B.X.C").addSummary("*.*.C:4", "*.X.C:3", "B.X.C:2", "A.B.C:1", "A.X.C:1");
|
||||
|
||||
return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class);
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled = true, dependsOnMethods = "testSummarizePath", dataProvider = "summaries")
|
||||
public void testSummarizeDifferences(SummarizeDifferenceTest test) {
|
||||
test.test();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,249 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Basic unit test for DifferableReaders in reduced reads
|
||||
*/
|
||||
public class DiffNodeUnitTest extends BaseTest {
|
||||
// Data is:
|
||||
// MY_ROOT
|
||||
// fields: A=A, B=B
|
||||
// nodes: C, D
|
||||
// C: fields: E=E, nodes: none
|
||||
// D: fields: F=F, G=G, nodes: none
|
||||
static DiffNode MY_ROOT = DiffNode.rooted("MY_ROOT");
|
||||
static DiffValue Value_A = new DiffValue("A", MY_ROOT, "A");
|
||||
static DiffValue Value_B = new DiffValue("B", MY_ROOT, "B");
|
||||
static DiffNode NODE_C = DiffNode.empty("C", MY_ROOT);
|
||||
static DiffNode NODE_D = DiffNode.empty("D", MY_ROOT);
|
||||
static DiffValue Value_E = new DiffValue("E", NODE_C, "E");
|
||||
static DiffValue Value_F = new DiffValue("F", NODE_D, "F");
|
||||
static DiffValue Value_G = new DiffValue("G", NODE_D, "G");
|
||||
|
||||
static {
|
||||
MY_ROOT.add(Value_A);
|
||||
MY_ROOT.add(Value_B);
|
||||
MY_ROOT.add(NODE_C);
|
||||
MY_ROOT.add(NODE_D);
|
||||
NODE_C.add(Value_E);
|
||||
NODE_D.add(Value_F);
|
||||
NODE_D.add(Value_G);
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Element testing routines
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class ElementTest extends TestDataProvider {
|
||||
public DiffElement elt;
|
||||
public String name;
|
||||
public String fullName;
|
||||
public DiffElement parent;
|
||||
|
||||
private ElementTest(DiffValue elt, DiffValue parent, String name, String fullName) {
|
||||
this(elt.getBinding(), parent.getBinding(), name, fullName);
|
||||
}
|
||||
|
||||
private ElementTest(DiffElement elt, DiffElement parent, String name, String fullName) {
|
||||
super(ElementTest.class);
|
||||
this.elt = elt;
|
||||
this.name = name;
|
||||
this.fullName = fullName;
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("ElementTest elt=%s name=%s fullName=%s parent=%s",
|
||||
elt.toOneLineString(), name, fullName, parent.getName());
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "elementdata")
|
||||
public Object[][] createElementData() {
|
||||
new ElementTest(MY_ROOT.getBinding(), DiffElement.ROOT, "MY_ROOT", "MY_ROOT");
|
||||
new ElementTest(NODE_C, MY_ROOT, "C", "MY_ROOT.C");
|
||||
new ElementTest(NODE_D, MY_ROOT, "D", "MY_ROOT.D");
|
||||
new ElementTest(Value_A, MY_ROOT, "A", "MY_ROOT.A");
|
||||
new ElementTest(Value_B, MY_ROOT, "B", "MY_ROOT.B");
|
||||
new ElementTest(Value_E, NODE_C, "E", "MY_ROOT.C.E");
|
||||
new ElementTest(Value_F, NODE_D, "F", "MY_ROOT.D.F");
|
||||
new ElementTest(Value_G, NODE_D, "G", "MY_ROOT.D.G");
|
||||
return TestDataProvider.getTests(ElementTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "elementdata")
|
||||
public void testElementMethods(ElementTest test) {
|
||||
Assert.assertNotNull(test.elt.getName());
|
||||
Assert.assertNotNull(test.elt.getParent());
|
||||
Assert.assertEquals(test.elt.getName(), test.name);
|
||||
Assert.assertEquals(test.elt.getParent(), test.parent);
|
||||
Assert.assertEquals(test.elt.fullyQualifiedName(), test.fullName);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// DiffValue testing routines
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class LeafTest extends TestDataProvider {
|
||||
public DiffValue diffvalue;
|
||||
public Object value;
|
||||
|
||||
private LeafTest(DiffValue diffvalue, Object value) {
|
||||
super(LeafTest.class);
|
||||
this.diffvalue = diffvalue;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("LeafTest diffvalue=%s value=%s", diffvalue.toOneLineString(), value);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "leafdata")
|
||||
public Object[][] createLeafData() {
|
||||
new LeafTest(Value_A, "A");
|
||||
new LeafTest(Value_B, "B");
|
||||
new LeafTest(Value_E, "E");
|
||||
new LeafTest(Value_F, "F");
|
||||
new LeafTest(Value_G, "G");
|
||||
return TestDataProvider.getTests(LeafTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "leafdata")
|
||||
public void testLeafMethods(LeafTest test) {
|
||||
Assert.assertNotNull(test.diffvalue.getValue());
|
||||
Assert.assertEquals(test.diffvalue.getValue(), test.value);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Node testing routines
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class NodeTest extends TestDataProvider {
|
||||
public DiffNode node;
|
||||
public Set<String> fields;
|
||||
public Set<String> subnodes;
|
||||
public Set<String> allNames;
|
||||
|
||||
private NodeTest(DiffNode node, List<String> fields, List<String> subnodes) {
|
||||
super(NodeTest.class);
|
||||
this.node = node;
|
||||
this.fields = new HashSet<String>(fields);
|
||||
this.subnodes = new HashSet<String>(subnodes);
|
||||
this.allNames = new HashSet<String>(fields);
|
||||
allNames.addAll(subnodes);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("NodeTest node=%s fields=%s subnodes=%s",
|
||||
node.toOneLineString(), fields, subnodes);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "nodedata")
|
||||
public Object[][] createData1() {
|
||||
new NodeTest(MY_ROOT, Arrays.asList("A", "B"), Arrays.asList("C", "D"));
|
||||
new NodeTest(NODE_C, Arrays.asList("E"), Collections.<String>emptyList());
|
||||
new NodeTest(NODE_D, Arrays.asList("F", "G"), Collections.<String>emptyList());
|
||||
return TestDataProvider.getTests(NodeTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "nodedata")
|
||||
public void testNodeAccessors(NodeTest test) {
|
||||
Assert.assertNotNull(test.node.getElements());
|
||||
|
||||
for ( String name : test.allNames ) {
|
||||
DiffElement elt = test.node.getElement(name);
|
||||
Assert.assertNotNull(elt, "Failed to find field " + elt + " in " + test.node);
|
||||
Assert.assertEquals(elt.getName(), name);
|
||||
Assert.assertEquals(elt.getValue().isAtomic(), test.fields.contains(name), "Failed atomic/compound expectation: " + test.node);
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: add routines are being implicitly tested by the creation of the data structures
|
||||
|
||||
@Test(enabled = true, dataProvider = "nodedata")
|
||||
public void testCounts(NodeTest test) {
|
||||
Assert.assertEquals(test.node.getElements().size(), test.allNames.size());
|
||||
Assert.assertEquals(test.node.getElementNames(), test.allNames);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// fromString testing routines
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class FromStringTest extends TestDataProvider {
|
||||
public String string;
|
||||
public DiffElement expected;
|
||||
|
||||
private FromStringTest(String string, DiffElement expected) {
|
||||
super(FromStringTest.class);
|
||||
this.string = string;
|
||||
this.expected = expected;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("FromStringTest string=%s expected=%s", string, expected.toOneLineString());
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "fromstringdata")
|
||||
public Object[][] createFromData() {
|
||||
new FromStringTest("A=A", Value_A.getBinding());
|
||||
new FromStringTest("B=B", Value_B.getBinding());
|
||||
new FromStringTest("C=(E=E)", NODE_C.getBinding());
|
||||
new FromStringTest("D=(F=F G=G)", NODE_D.getBinding());
|
||||
return TestDataProvider.getTests(FromStringTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "fromstringdata")
|
||||
public void parseFromString(FromStringTest test) {
|
||||
logger.warn("Testing from string: " + test.string);
|
||||
DiffElement elt = DiffNode.fromString(test.string);
|
||||
Assert.assertEquals(elt.toOneLineString(), test.expected.toOneLineString());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Basic unit test for DifferableReaders in reduced reads
|
||||
*/
|
||||
public class DiffableReaderUnitTest extends BaseTest {
|
||||
DiffEngine engine;
|
||||
|
||||
File vcfFile = new File(testDir + "diffTestMaster.vcf");
|
||||
File bamFile = new File(testDir + "exampleBAM.bam");
|
||||
|
||||
@BeforeClass(enabled = true)
|
||||
public void createDiffEngine() {
|
||||
engine = new DiffEngine();
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testPluggableDiffableReaders() {
|
||||
logger.warn("testPluggableDiffableReaders");
|
||||
Map<String, DiffableReader> readers = engine.getReaders();
|
||||
Assert.assertNotNull(readers);
|
||||
Assert.assertTrue(readers.size() > 0);
|
||||
Assert.assertNotNull(readers.get("VCF"));
|
||||
for ( Map.Entry<String, DiffableReader> e : engine.getReaders().entrySet() ) {
|
||||
logger.warn("Found diffable reader: " + e.getKey());
|
||||
Assert.assertEquals(e.getValue().getName(), e.getKey());
|
||||
Assert.assertEquals(e.getValue(), engine.getReader(e.getKey()));
|
||||
}
|
||||
}
|
||||
|
||||
private static void testLeaf(DiffNode rec, String field, Object expected) {
|
||||
DiffElement value = rec.getElement(field);
|
||||
Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec);
|
||||
Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders")
|
||||
public void testVCF1() {
|
||||
logger.warn("testVCF1");
|
||||
DiffableReader vcfReader = engine.getReader("VCF");
|
||||
Assert.assertTrue(vcfReader.canRead(vcfFile));
|
||||
Assert.assertFalse(vcfReader.canRead(bamFile));
|
||||
|
||||
DiffElement diff = vcfReader.readFromFile(vcfFile, -1);
|
||||
Assert.assertNotNull(diff);
|
||||
|
||||
Assert.assertEquals(diff.getName(), vcfFile.getName());
|
||||
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
|
||||
|
||||
DiffNode node = diff.getValueAsNode();
|
||||
Assert.assertEquals(node.getElements().size(), 10);
|
||||
|
||||
// chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03
|
||||
DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
|
||||
testLeaf(rec1, "CHROM", "chr1");
|
||||
testLeaf(rec1, "POS", 2646);
|
||||
testLeaf(rec1, "ID", "rs62635284");
|
||||
testLeaf(rec1, "REF", Allele.create("G", true));
|
||||
testLeaf(rec1, "ALT", new HashSet<Allele>(Arrays.asList(Allele.create("A"))));
|
||||
testLeaf(rec1, "QUAL", 0.15);
|
||||
testLeaf(rec1, "FILTER", Collections.<Object>emptySet());
|
||||
testLeaf(rec1, "AC", "2");
|
||||
testLeaf(rec1, "AF", "1.00");
|
||||
testLeaf(rec1, "AN", "2");
|
||||
}
|
||||
|
||||
@Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders")
|
||||
public void testBAM() {
|
||||
logger.warn("testBAM");
|
||||
DiffableReader bamReader = engine.getReader("BAM");
|
||||
Assert.assertTrue(bamReader.canRead(bamFile));
|
||||
Assert.assertFalse(bamReader.canRead(vcfFile));
|
||||
|
||||
DiffElement diff = bamReader.readFromFile(bamFile, -1);
|
||||
Assert.assertNotNull(diff);
|
||||
|
||||
Assert.assertEquals(diff.getName(), bamFile.getName());
|
||||
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
|
||||
|
||||
DiffNode node = diff.getValueAsNode();
|
||||
Assert.assertEquals(node.getElements().size(), 33);
|
||||
|
||||
// 30PPJAAXX090125:1:42:512:1817#0 99 chr1 200 0 76M =
|
||||
// 255 -130 ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC
|
||||
// BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:
|
||||
// PG:Z:0 RG:Z:exampleBAM.bam SM:Z:exampleBAM.bam
|
||||
|
||||
DiffNode rec1 = node.getElement("30PPJAAXX090125:1:42:512:1817#0_1").getValueAsNode();
|
||||
testLeaf(rec1, "NAME", "30PPJAAXX090125:1:42:512:1817#0");
|
||||
testLeaf(rec1, "FLAGS", 99);
|
||||
testLeaf(rec1, "RNAME", "chr1");
|
||||
testLeaf(rec1, "POS", 200);
|
||||
testLeaf(rec1, "MAPQ", 0);
|
||||
testLeaf(rec1, "CIGAR", "76M");
|
||||
testLeaf(rec1, "RNEXT", "chr1");
|
||||
testLeaf(rec1, "PNEXT", 255);
|
||||
testLeaf(rec1, "TLEN", -130);
|
||||
testLeaf(rec1, "SEQ", "ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC");
|
||||
testLeaf(rec1, "QUAL", "BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:");
|
||||
testLeaf(rec1, "PG", "0");
|
||||
testLeaf(rec1, "RG", "exampleBAM.bam");
|
||||
testLeaf(rec1, "SM", "exampleBAM.bam");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Basic unit test for DifferableReaders in reduced reads
|
||||
*/
|
||||
public class DifferenceUnitTest extends BaseTest {
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// testing routines
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class DifferenceTest extends TestDataProvider {
|
||||
public DiffElement tree1, tree2;
|
||||
public String difference;
|
||||
|
||||
private DifferenceTest(String tree1, String tree2, String difference) {
|
||||
this(DiffNode.fromString(tree1), DiffNode.fromString(tree2), difference);
|
||||
}
|
||||
|
||||
private DifferenceTest(DiffElement tree1, DiffElement tree2, String difference) {
|
||||
super(DifferenceTest.class);
|
||||
this.tree1 = tree1;
|
||||
this.tree2 = tree2;
|
||||
this.difference = difference;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("tree1=%s tree2=%s diff=%s",
|
||||
tree1 == null ? "null" : tree1.toOneLineString(),
|
||||
tree2 == null ? "null" : tree2.toOneLineString(),
|
||||
difference);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createTrees() {
|
||||
new DifferenceTest("A=X", "A=Y", "A:X!=Y");
|
||||
new DifferenceTest("A=Y", "A=X", "A:Y!=X");
|
||||
new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING");
|
||||
new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X");
|
||||
return DifferenceTest.getTests(DifferenceTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "data")
|
||||
public void testDiffToString(DifferenceTest test) {
|
||||
logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString()));
|
||||
logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString()));
|
||||
logger.warn("Test expected diff : " + test.difference);
|
||||
SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2);
|
||||
logger.warn("Observed diffs : " + diff);
|
||||
Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testNoAction() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347"));
|
||||
Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c"));
|
||||
executeTest("test no action", spec);
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testClusteredSnps() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -window 10 -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("ada5540bb3d9b6eb8f1337ba01e90a94"));
|
||||
Arrays.asList("27b13f179bb4920615dff3a32730d845"));
|
||||
executeTest("test clustered SNPs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -32,17 +32,17 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMasks() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae"));
|
||||
Arrays.asList("578f9e774784c25871678e6464fd212b"));
|
||||
executeTest("test mask all", spec1);
|
||||
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b64baabe905a5d197cc1ab594147d3d5"));
|
||||
Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f"));
|
||||
executeTest("test mask some", spec2);
|
||||
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("0eff92fe72024d535c44b98e1e9e1993"));
|
||||
Arrays.asList("5939f80d14b32d88587373532d7b90e5"));
|
||||
executeTest("test mask extend", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("7a40795147cbfa92941489d7239aad92"));
|
||||
Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368"));
|
||||
executeTest("test filter #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("e9dd4991b1e325847c77d053dfe8ee54"));
|
||||
Arrays.asList("c95845e817da7352b9b72bc9794f18fb"));
|
||||
executeTest("test filter #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilterWithSeparateNames() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("9ded2cce63b8d97550079047051d80a3"));
|
||||
Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530"));
|
||||
executeTest("test filter with separate names #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,12 +74,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFilters() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("6696e3f65a62ce912230d47cdb0c129b"));
|
||||
Arrays.asList("96b61e4543a73fe725e433f007260039"));
|
||||
executeTest("test genotype filter #1", spec1);
|
||||
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9"));
|
||||
Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e"));
|
||||
executeTest("test genotype filter #2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testDeletions() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo -B:variant,VCF " + validationDataLocation + "twoDeletions.vcf", 1,
|
||||
Arrays.asList("e63b58be33c9126ad6cc55489aac539b"));
|
||||
Arrays.asList("569546fd798afa0e65c5b61b440d07ac"));
|
||||
executeTest("test deletions", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0"));
|
||||
Arrays.asList("c97829259463d04b0159591bb6fb44af"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("edeb1db288a24baff59575ceedd94243"));
|
||||
Arrays.asList("2b69667f4770e8c0c894066b7f27e440"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("581990130d90071b084024f4cd7caf91"));
|
||||
Arrays.asList("b77fe007c2a97fcd59dfd5eef94d8b95"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("d120db27d694a6da32367cc4fb5770fa"));
|
||||
Arrays.asList("ee8a5e63ddd470726a749e69c0c20f60"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "ef31654a2b85b9b2d3bba4f4a75a17b6";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "a29615dd37222a11b8dadd341b53e43c";
|
||||
String md5 = "46868a9c4134651c54535fb46b408aee";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCallingParameters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" );
|
||||
e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" );
|
||||
e.put( "--min_base_quality_score 26", "5043c9a101e691602eb7a3f9704bdf20" );
|
||||
e.put( "--min_mapping_quality_score 26", "71a833eb8fd93ee62ae0d5a430f27940" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "ddf443e9dcadef367476b26b4d52c134" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" );
|
||||
e.put( "-sites_only", "eaad6ceb71ab94290650a70bea5ab951" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "05bf7db8a3d19ef4a3d14772c90b732f" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "e4b86740468d7369f0156550855586c7" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("64be99183c100caed4aa5f8bad64c7e9"));
|
||||
Arrays.asList("71a833eb8fd93ee62ae0d5a430f27940"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("e76ca54232d02f0d92730e1affeb804e"));
|
||||
Arrays.asList("79968844dc3ddecb97748c1acf2984c7"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" );
|
||||
e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" );
|
||||
e.put( 0.01, "4e878664f61d2d800146d3762303fde1" );
|
||||
e.put( 1.0 / 1850, "9204caec095ff5e63ca21a10b6fab453" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
|
||||
Arrays.asList("1a58ec52df545f946f80cc16c5736a91"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("0919ab7e513c377610e23a67d33608fa"));
|
||||
Arrays.asList("62d0f6d9de344ce68ce121c13b1e78b1"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq OFF",
|
||||
1,
|
||||
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
|
||||
Arrays.asList("1a58ec52df545f946f80cc16c5736a91"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ OFF"), spec);
|
||||
}
|
||||
|
|
@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("cb37348c41b8181be829912730f747e1"));
|
||||
Arrays.asList("631ae1f1eb6bc4c1a4136b8495250536"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2"));
|
||||
Arrays.asList("fd556585c79e2b892a5976668f45aa43"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81"));
|
||||
Arrays.asList("9cd56feedd2787919e571383889fde70"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364"));
|
||||
Arrays.asList("315e1b78d7a403d7fcbcf0caa8c496b8"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("1b9764b783acf7822edc58e6822eef5b"));
|
||||
Arrays.asList("cf89e0c54f14482a23c105b73a333d8a"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("e312b7d3854d5b2834a370659514a813"));
|
||||
Arrays.asList("7f11f7f75d1526077f0173c7ed1fc6c4"));
|
||||
executeTest("Merge MNP sites within genomic distance of 1 [TEST ONE]", spec);
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("681f50e45f1d697370d2c355df2e18bc"));
|
||||
Arrays.asList("53dd312468296826bdd3c22387390c88"));
|
||||
executeTest("Merge MNP sites within genomic distance of 10 [TEST TWO]", spec);
|
||||
}
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("0bccb0ef928a108418246bec01098083"));
|
||||
Arrays.asList("e26f92d2fb9f4eaeac7f9d8ee27410ee"));
|
||||
executeTest("Merge MNP sites within genomic distance of 100 [TEST THREE]", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
|||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("e16f957d888054ae0518e25660295241"));
|
||||
Arrays.asList("af5e1370822551c0c6f50f23447dc627"));
|
||||
executeTest("Merge sites within genomic distance of 1 [TEST ONE]", spec);
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
|||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("122a482090677c7619c2105d44e00d11"));
|
||||
Arrays.asList("dd8c44ae1ef059a7fe85399467e102eb"));
|
||||
executeTest("Merge sites within genomic distance of 10 [TEST TWO]", spec);
|
||||
}
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
|||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("bc6a8c8a42bb2601db98e88e9ad74748"));
|
||||
Arrays.asList("f81fd72ecaa57b3215406fcea860bcc5"));
|
||||
executeTest("Merge sites within genomic distance of 100 [TEST THREE]", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("6020a68bbec97fcd87819c10cd4e2470"));
|
||||
Arrays.asList("9568ba0b6624b97ac55a59bdee2d9150"));
|
||||
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:1232503-1332503",
|
||||
1,
|
||||
Arrays.asList("712c2145df4756c9a15758865d8007b5"));
|
||||
Arrays.asList("ce65194c24fe83b0ec90faa6c8e6109a"));
|
||||
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("297e0896e4761529d979f40f5ad694db"));
|
||||
Arrays.asList("02d134fd544613b1e5dd7f7197fc3753"));
|
||||
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("52a17f14692d726d3b726cf0ae7f2a09"));
|
||||
Arrays.asList("2f7ec9904fc054c2ba1a7db05eb29334"));
|
||||
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
|
||||
+ " -L chr20:332341-482503",
|
||||
1,
|
||||
Arrays.asList("af768f7958b8f4599c2374f1cc2fc613"));
|
||||
Arrays.asList("da7a31725f229d1782dd3049848730aa"));
|
||||
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:652810-681757",
|
||||
1,
|
||||
Arrays.asList("3dd886672f59a47908b94136d0427bb0"));
|
||||
Arrays.asList("e9d35cb88089fb0e8ae6678bfaeeac8c"));
|
||||
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
public void testCountCovariates1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f4f8a49bb5764d2a8f61e055f64dcce4");
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "570506533f079d738d70934dfe1c02cd" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" );
|
||||
|
||||
for ( String parallelism : Arrays.asList("", " -nt 4")) {
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
|
|
@ -53,9 +53,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
public void testTableRecalibrator1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5");
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "064c4a7bdd23974c3a9c5f924540df76" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -107,7 +107,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibratorMaxQ70() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5" );
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -133,12 +133,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesSolidIndelsRemoveRefBias() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "0a6cdb9611e5880ea6611205080aa267" );
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -164,7 +162,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9bc7e1ad223ba759fe5e8ddb4c07369c" );
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -189,13 +187,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesVCF() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3700eaf567e4937f442fc777a226d6ad");
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -219,7 +214,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesBED() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "6803891a3398821fc8a37e19ea8e5a00");
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -243,7 +238,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesVCFPlusDBsnp() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f224c42fbc4026db973ccc91265ab5c7");
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -268,69 +263,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "c024e03f019aeceaf364fa58c8295ad8" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" -cov ReadGroupCovariate" +
|
||||
" -cov QualityScoreCovariate" +
|
||||
" -cov CycleCovariate" +
|
||||
" -cov DinucCovariate" +
|
||||
" --default_read_group DefaultReadGroup" +
|
||||
" --default_platform illumina" +
|
||||
" --solid_recal_mode SET_Q_ZERO" +
|
||||
" -recalFile %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
List<File> result = executeTest("testCountCovariatesNoReadGroups", spec).getFirst();
|
||||
paramsFilesNoReadGroupTest.put(bam, result.get(0).getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTableRecalibratorNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "1eefbe7ac0376fc1ed1392d85242171e" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
String paramsFile = paramsFilesNoReadGroupTest.get(bam);
|
||||
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
|
||||
if ( paramsFile != null ) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -T TableRecalibration" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,100,000-10,300,000" +
|
||||
" -o %s" +
|
||||
" --no_pg_tag" +
|
||||
" --solid_recal_mode SET_Q_ZERO" +
|
||||
" --default_read_group DefaultReadGroup" +
|
||||
" --default_platform illumina" +
|
||||
" -recalFile " + paramsFile,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testTableRecalibratorNoReadGroups", spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesNoIndex() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "cfc31bb6f51436d1c3b34f62bb801dc8" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -356,7 +292,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibratorNoIndex() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "83b848a16034c2fb423d1bb0f5be7784" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -380,11 +316,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesFailWithoutDBSNP() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "");
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"d33212a84368e821cbedecd4f59756d6", // tranches
|
||||
"4652dca41222bebdf9d9fda343b2a835", // recal file
|
||||
"5350b1a4c1250cf3b77ca45327c04711"); // cut VCF
|
||||
"243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
|
|||
|
|
@ -34,76 +34,76 @@ import java.util.Arrays;
|
|||
* Tests CombineVariants
|
||||
*/
|
||||
public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||
// public static String baseTestString(String args) {
|
||||
// return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
|
||||
// }
|
||||
//
|
||||
// public void test1InOut(String file, String md5, boolean vcf3) {
|
||||
// test1InOut(file, md5, "", vcf3);
|
||||
// }
|
||||
//
|
||||
// public void test1InOut(String file, String md5, String args, boolean vcf3) {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
|
||||
// 1,
|
||||
// Arrays.asList(md5));
|
||||
// executeTest("testInOut1--" + file, spec);
|
||||
// }
|
||||
//
|
||||
// public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
|
||||
// 1,
|
||||
// Arrays.asList(md5));
|
||||
// executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
// }
|
||||
//
|
||||
// public void combineSites(String args, String md5) {
|
||||
// String file1 = "1000G_omni2.5.b37.sites.vcf";
|
||||
// String file2 = "hapmap_3.3.b37.sites.vcf";
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
|
||||
// + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
|
||||
// + " -B:hm3,VCF " + validationDataLocation + file2 + args,
|
||||
// 1,
|
||||
// Arrays.asList(md5));
|
||||
// executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
// }
|
||||
//
|
||||
//
|
||||
// @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); }
|
||||
// @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); }
|
||||
// @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1474ac0fde2ce42a3c24f1c97eab333e", " -setKey null", true); }
|
||||
// @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "7fc66df048a0ab08cf507906e1d4a308", false); } // official project VCF files in tabix format
|
||||
//
|
||||
// @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ec9715f53dbf4531570557c212822f12", false); }
|
||||
// @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f1072be5f5c6ee810276d9ca6537224d", false); }
|
||||
//
|
||||
// @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "b77a1eec725201d9d8e74ee0c45638d3", false); } // official project VCF files in tabix format
|
||||
// @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "802977fdfd2f4905b501bb06800f60af", false); } // official project VCF files in tabix format
|
||||
// @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a67157287dd2b24b5cdf7ebf8fcbbe9a", false); }
|
||||
//
|
||||
// @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e1f4718a179f1196538a33863da04f53", false); }
|
||||
//
|
||||
// @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); }
|
||||
//
|
||||
// @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); }
|
||||
// @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); }
|
||||
//
|
||||
// @Test public void threeWayWithRefs() {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
|
||||
// " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
|
||||
// " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
|
||||
// " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
|
||||
// " -setKey centerSet" +
|
||||
// " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
|
||||
// " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
// " -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
// 1,
|
||||
// Arrays.asList("a07995587b855f3214fb71940bf23c0f"));
|
||||
// executeTest("threeWayWithRefs", spec);
|
||||
// }
|
||||
public static String baseTestString(String args) {
|
||||
return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
|
||||
}
|
||||
|
||||
public void test1InOut(String file, String md5, boolean vcf3) {
|
||||
test1InOut(file, md5, "", vcf3);
|
||||
}
|
||||
|
||||
public void test1InOut(String file, String md5, String args, boolean vcf3) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("testInOut1--" + file, spec);
|
||||
}
|
||||
|
||||
public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
}
|
||||
|
||||
public void combineSites(String args, String md5) {
|
||||
String file1 = "1000G_omni2.5.b37.sites.vcf";
|
||||
String file2 = "hapmap_3.3.b37.sites.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
|
||||
+ " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
|
||||
+ " -B:hm3,VCF " + validationDataLocation + file2 + args,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); }
|
||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0", false); } // official project VCF files in tabix format
|
||||
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); }
|
||||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); }
|
||||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
|
||||
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); }
|
||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); }
|
||||
|
||||
@Test public void threeWayWithRefs() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
|
||||
" -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
|
||||
" -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
|
||||
" -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
|
||||
" -setKey centerSet" +
|
||||
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
|
||||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5"));
|
||||
executeTest("threeWayWithRefs", spec);
|
||||
}
|
||||
|
||||
|
||||
// complex examples with filtering, indels, and multiple alleles
|
||||
|
|
@ -119,8 +119,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
@Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "0db9ef50fe54b60426474273d7c7fa99"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "d20acb3d53ba0a02ce92d540ebeda2a9"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "8d1b3d120515f8b56b5a0d10bc5da713"); }
|
||||
// @Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
|
||||
}
|
||||
|
|
@ -40,7 +40,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("37e23efd7d6471fc0f807b31ccafe0eb"));
|
||||
Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd"));
|
||||
executeTest("test b36 to hg19", spec);
|
||||
}
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("b6ef4a2f026fd3843aeb9ed764a66921"));
|
||||
Arrays.asList("3fd7ec2dc4064ef410786276b0dc9d08"));
|
||||
executeTest("test b36 to hg19, unsorted samples", spec);
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + hg18Reference + " -B:variant,vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("3275373b3c44ad14a270b50664b3f8a3"));
|
||||
Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b"));
|
||||
executeTest("test hg18 to hg19, unsorted", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' -B:variant,VCF3 " + testfile + " -NO_HEADER"),
|
||||
1,
|
||||
Arrays.asList("1b9d551298dc048c7d36b60440ff4d50")
|
||||
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
|
||||
);
|
||||
|
||||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
|
|
@ -31,7 +31,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -sn B -sn C -B:variant,VCF3 " + testfile + " -NO_HEADER"),
|
||||
1,
|
||||
Arrays.asList("5ba7536a0819421b330350a160e4261a")
|
||||
Arrays.asList("b74038779fe6485dbb8734ae48178356")
|
||||
);
|
||||
|
||||
executeTest("testRepeatedLineSelection--" + testfile, spec);
|
||||
|
|
@ -44,7 +44,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -disc myvar -L 20:1012700-1020000 -B:variant,VCF " + b37hapmapGenotypes + " -B:myvar,VCF " + testFile + " -o %s -NO_HEADER",
|
||||
1,
|
||||
Arrays.asList("97621ae8f29955eedfc4e0be3515fcb9")
|
||||
Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
|
||||
);
|
||||
|
||||
executeTest("testDiscordance--" + testFile, spec);
|
||||
|
|
@ -57,7 +57,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -conc hapmap -L 20:1012700-1020000 -B:hapmap,VCF " + b37hapmapGenotypes + " -B:variant,VCF " + testFile + " -o %s -NO_HEADER",
|
||||
1,
|
||||
Arrays.asList("a0ae016fdffcbe7bfb99fd3dbc311407")
|
||||
Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a")
|
||||
);
|
||||
|
||||
executeTest("testConcordance--" + testFile, spec);
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
" --NO_HEADER" +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("debbbf3e661b6857cc8d99ff7635bb1d")
|
||||
Arrays.asList("658f580f7a294fd334bd897102616fed")
|
||||
);
|
||||
|
||||
executeTest("testSimpleVCFStreaming", spec);
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantsToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("bd15d98adc76b5798e3bbeff3f936feb");
|
||||
md5.add("4accae035d271b35ee2ec58f403c68c6");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -38,7 +38,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("acd15d3f85bff5b545bc353e0e23cc6e");
|
||||
md5.add("71e8c98d7c3a73b6287ecc339086fe03");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -56,7 +56,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingHapMapInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("6f34528569f8cf5941cb365fa77288c1");
|
||||
md5.add("f343085305e80c7a2493422e4eaad983");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -73,7 +73,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingVCFInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("d8316fc1b9d8e954a58940354119a32e");
|
||||
md5.add("86f02e2e764ba35854cff2aa05a1fdd8");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broad.tribble.Tribble;
|
|||
import org.broad.tribble.index.*;
|
||||
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
||||
import org.broad.tribble.source.BasicFeatureSource;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -75,7 +76,7 @@ public class IndexFactoryUnitTest {
|
|||
|
||||
// test that the input index is the same as the one created from the identical input file
|
||||
// test that the dynamic index is the same as the output index, which is equal to the input index
|
||||
Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(outputFile, outputFileIndex, new VCFCodec()));
|
||||
WalkerTest.assertOnDiskIndexEqualToNewlyCreatedIndex(outputFileIndex, "unittest", outputFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class VCFIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void testReadingAndWritingWitHNoChanges() {
|
||||
|
||||
String md5ofInputVCF = "a990ba187a69ca44cb9bc2bb44d00447";
|
||||
String testVCF = validationDataLocation + "vcf4.1.example.vcf";
|
||||
|
||||
String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T VariantAnnotator -BTI variant -B:variant,vcf " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF));
|
||||
List<File> result = executeTest("Test Variant Annotator with no changes", spec1).getFirst();
|
||||
|
||||
String test2 = baseCommand + "-T VariantsToVCF -B:variant,vcf " + result.get(0).getAbsolutePath();
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF));
|
||||
executeTest("Test Variants To VCF from new output", spec2);
|
||||
}
|
||||
}
|
||||
|
|
@ -49,7 +49,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
|
||||
2, // just one output file
|
||||
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "e6673737acbb6bfabfcd92c4b2268241"));
|
||||
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63"));
|
||||
executeTest("testToVCF", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@
|
|||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.QualityScoreCovariate" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate" />
|
||||
</dependencies>
|
||||
</executable>
|
||||
<resources>
|
||||
|
|
|
|||
|
|
@ -3,14 +3,15 @@ package org.broadinstitute.sting.queue.qscripts
|
|||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||
|
||||
import scala.io.Source._
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
||||
import org.broadinstitute.sting.queue.extensions.picard._
|
||||
import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord}
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
||||
import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode
|
||||
|
||||
import collection.JavaConversions._
|
||||
import net.sf.samtools.SAMFileReader
|
||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||
|
||||
import org.broadinstitute.sting.queue.util.QScriptUtils
|
||||
|
||||
class DataProcessingPipeline extends QScript {
|
||||
qscript =>
|
||||
|
|
@ -29,7 +30,8 @@ class DataProcessingPipeline extends QScript {
|
|||
@Input(doc="Reference fasta file", fullName="reference", shortName="R", required=true)
|
||||
var reference: File = _
|
||||
|
||||
|
||||
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
|
||||
var dbSNP: File = _
|
||||
|
||||
/****************************************************************************
|
||||
* Optional Parameters
|
||||
|
|
@ -39,14 +41,12 @@ class DataProcessingPipeline extends QScript {
|
|||
// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
|
||||
// var sortSamJar: File = _
|
||||
//
|
||||
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
||||
var bwaPath: File = _
|
||||
|
||||
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=false)
|
||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||
|
||||
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
|
||||
var indels: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf")
|
||||
var indels: File = _
|
||||
|
||||
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
||||
var bwaPath: File = _
|
||||
|
||||
@Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false)
|
||||
var projectName: String = "project"
|
||||
|
|
@ -103,18 +103,6 @@ class DataProcessingPipeline extends QScript {
|
|||
val ds: String)
|
||||
{}
|
||||
|
||||
// Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that)
|
||||
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
||||
var sample: String = ""
|
||||
for (r <- readGroups) {
|
||||
if (sample.isEmpty)
|
||||
sample = r.getSample
|
||||
else if (sample != r.getSample)
|
||||
return true;
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
|
||||
// It uses the sample information on the header of the input BAM files.
|
||||
//
|
||||
|
|
@ -135,7 +123,7 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
// only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
|
||||
// with PrintReads to separate the samples. Tell user to do it himself!
|
||||
assert(!hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
||||
assert(!QScriptUtils.hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
||||
|
||||
// Fill out the sample table with the readgroups in this file
|
||||
for (rg <- readGroups) {
|
||||
|
|
@ -147,20 +135,23 @@ class DataProcessingPipeline extends QScript {
|
|||
}
|
||||
}
|
||||
|
||||
println("\n\n*** DEBUG ***\n")
|
||||
// Creating one file for each sample in the dataset
|
||||
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||
for ((sample, flist) <- sampleTable) {
|
||||
|
||||
println(sample + ":")
|
||||
for (f <- flist)
|
||||
println (f)
|
||||
println()
|
||||
|
||||
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".bam")
|
||||
sampleBamFiles(sample) = sampleFileName
|
||||
add(joinBams(flist, sampleFileName))
|
||||
}
|
||||
return sampleBamFiles.toMap
|
||||
}
|
||||
println("*** DEBUG ***\n\n")
|
||||
|
||||
// Checks how many contigs are in the dataset. Uses the BAM file header information.
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(new File(bamFile))
|
||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
return sampleBamFiles.toMap
|
||||
}
|
||||
|
||||
// Rebuilds the Read Group string to give BWA
|
||||
|
|
@ -206,17 +197,6 @@ class DataProcessingPipeline extends QScript {
|
|||
return realignedBams
|
||||
}
|
||||
|
||||
// Reads a BAM LIST file and creates a scala list with all the files
|
||||
def createListFromFile(in: File):List[File] = {
|
||||
if (in.toString.endsWith("bam"))
|
||||
return List(in)
|
||||
var l: List[File] = List()
|
||||
for (bam <- fromFile(in).getLines)
|
||||
l :+= new File(bam)
|
||||
return l
|
||||
}
|
||||
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Main script
|
||||
|
|
@ -226,17 +206,14 @@ class DataProcessingPipeline extends QScript {
|
|||
def script = {
|
||||
|
||||
// keep a record of the number of contigs in the first bam file in the list
|
||||
val bams = createListFromFile(input)
|
||||
nContigs = getNumberOfContigs(bams(0))
|
||||
val bams = QScriptUtils.createListFromFile(input)
|
||||
nContigs = QScriptUtils.getNumberOfContigs(bams(0))
|
||||
|
||||
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
|
||||
|
||||
// Generate a BAM file per sample joining all per lane files if necessary
|
||||
val sampleBamFiles: Map[String, File] = createSampleFiles(bams, realignedBams)
|
||||
|
||||
|
||||
println("nContigs: " + nContigs)
|
||||
|
||||
// Final output list of processed bam files
|
||||
var cohortList: List[File] = List()
|
||||
|
||||
|
|
@ -244,6 +221,7 @@ class DataProcessingPipeline extends QScript {
|
|||
println("\nFound the following samples: ")
|
||||
for ((sample, file) <- sampleBamFiles)
|
||||
println("\t" + sample + " -> " + file)
|
||||
println("\n")
|
||||
|
||||
// If this is a 'knowns only' indel realignment run, do it only once for all samples.
|
||||
val globalIntervals = new File(outputDir + projectName + ".intervals")
|
||||
|
|
@ -310,7 +288,8 @@ class DataProcessingPipeline extends QScript {
|
|||
this.out = outIntervals
|
||||
this.mismatchFraction = 0.0
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
this.rodBind :+= RodBind("indels", "VCF", indels)
|
||||
if (!indels.isEmpty)
|
||||
this.rodBind :+= RodBind("indels", "VCF", indels)
|
||||
this.scatterCount = nContigs
|
||||
this.analysisName = queueLogDir + outIntervals + ".target"
|
||||
this.jobName = queueLogDir + outIntervals + ".target"
|
||||
|
|
@ -321,7 +300,8 @@ class DataProcessingPipeline extends QScript {
|
|||
this.targetIntervals = tIntervals
|
||||
this.out = outBam
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
|
||||
if (!indels.isEmpty)
|
||||
this.rodBind :+= RodBind("indels", "VCF", indels)
|
||||
this.consensusDeterminationModel = consensusDeterminationModel
|
||||
this.compress = 0
|
||||
this.scatterCount = nContigs
|
||||
|
|
@ -344,7 +324,7 @@ class DataProcessingPipeline extends QScript {
|
|||
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = inRecalFile
|
||||
this.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY
|
||||
this.baq = CalculationMode.CALCULATE_AS_NECESSARY
|
||||
this.out = outBam
|
||||
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
|
||||
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.queue.qscripts
|
|||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import net.sf.samtools.SAMFileReader
|
||||
import org.broadinstitute.sting.queue.util.QScriptUtils
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -32,26 +32,25 @@ class RecalibrateBaseQualities extends QScript {
|
|||
val queueLogDir: String = ".qlog/"
|
||||
var nContigs: Int = 0
|
||||
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(new File(bamFile))
|
||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
}
|
||||
|
||||
def script = {
|
||||
|
||||
nContigs = getNumberOfContigs(input)
|
||||
val bamList = QScriptUtils.createListFromFile(input)
|
||||
nContigs = QScriptUtils.getNumberOfContigs(bamList(0))
|
||||
|
||||
val recalFile1: File = swapExt(input, ".bam", "recal1.csv")
|
||||
val recalFile2: File = swapExt(input, ".bam", "recal2.csv")
|
||||
val recalBam: File = swapExt(input, ".bam", "recal.bam")
|
||||
val path1: String = "before"
|
||||
val path2: String = "after"
|
||||
|
||||
add(cov(input, recalFile1),
|
||||
recal(input, recalFile1, recalBam),
|
||||
cov(recalBam, recalFile2),
|
||||
analyzeCovariates(recalFile1, path1),
|
||||
analyzeCovariates(recalFile2, path2))
|
||||
for (bam <- bamList) {
|
||||
|
||||
val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv")
|
||||
val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv")
|
||||
val recalBam: File = swapExt(bam, ".bam", ".recal.bam")
|
||||
val path1: String = bam + ".before"
|
||||
val path2: String = bam + ".after"
|
||||
|
||||
add(cov(bam, recalFile1),
|
||||
recal(bam, recalFile1, recalBam),
|
||||
cov(recalBam, recalFile2),
|
||||
analyzeCovariates(recalFile1, path1),
|
||||
analyzeCovariates(recalFile2, path2))
|
||||
}
|
||||
}
|
||||
|
||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||
|
|
@ -84,7 +83,7 @@ class RecalibrateBaseQualities extends QScript {
|
|||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||
this.resources = R
|
||||
this.recal_file = inRecalFile
|
||||
this.output_dir = outPath.toString
|
||||
this.output_dir = outPath
|
||||
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,30 +138,32 @@ class QGraph extends Logging {
|
|||
validate()
|
||||
|
||||
if (running && numMissingValues == 0) {
|
||||
logger.info("Generating scatter gather jobs.")
|
||||
val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge))
|
||||
if (!scatterGathers.isEmpty) {
|
||||
logger.info("Generating scatter gather jobs.")
|
||||
|
||||
var addedFunctions = List.empty[QFunction]
|
||||
for (scatterGather <- scatterGathers) {
|
||||
val functions = scatterGather.asInstanceOf[FunctionEdge]
|
||||
.function.asInstanceOf[ScatterGatherableFunction]
|
||||
.generateFunctions()
|
||||
addedFunctions ++= functions
|
||||
var addedFunctions = List.empty[QFunction]
|
||||
for (scatterGather <- scatterGathers) {
|
||||
val functions = scatterGather.asInstanceOf[FunctionEdge]
|
||||
.function.asInstanceOf[ScatterGatherableFunction]
|
||||
.generateFunctions()
|
||||
addedFunctions ++= functions
|
||||
}
|
||||
|
||||
logger.info("Removing original jobs.")
|
||||
this.jobGraph.removeAllEdges(scatterGathers)
|
||||
prune()
|
||||
|
||||
logger.info("Adding scatter gather jobs.")
|
||||
addedFunctions.foreach(function => if (running) this.add(function))
|
||||
|
||||
logger.info("Regenerating graph.")
|
||||
fill
|
||||
val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile
|
||||
if (scatterGatherDotFile != null)
|
||||
renderToDot(scatterGatherDotFile)
|
||||
validate()
|
||||
}
|
||||
|
||||
logger.info("Removing original jobs.")
|
||||
this.jobGraph.removeAllEdges(scatterGathers)
|
||||
prune()
|
||||
|
||||
logger.info("Adding scatter gather jobs.")
|
||||
addedFunctions.foreach(function => if (running) this.add(function))
|
||||
|
||||
logger.info("Regenerating graph.")
|
||||
fill
|
||||
val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile
|
||||
if (scatterGatherDotFile != null)
|
||||
renderToDot(scatterGatherDotFile)
|
||||
validate()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -286,11 +286,11 @@ object Lsf706JobRunner extends Logging {
|
|||
// LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct)
|
||||
// LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist)
|
||||
logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId))
|
||||
val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate)
|
||||
if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) {
|
||||
val unknownStatusMillis = (System.currentTimeMillis - runner.lastStatusUpdate)
|
||||
if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) {
|
||||
// Unknown status has been returned for a while now.
|
||||
runner.updateStatus(RunnerStatus.FAILED)
|
||||
logger.error("Unable to read LSF status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description))
|
||||
logger.error("Unable to read LSF status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), runner.jobId, runner.function.description))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,60 @@
|
|||
package org.broadinstitute.sting.queue.util
|
||||
|
||||
import java.io.File
|
||||
import io.Source._
|
||||
import net.sf.samtools.{SAMReadGroupRecord, SAMFileReader}
|
||||
|
||||
import collection.JavaConversions._
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: carneiro
|
||||
* Date: 7/14/11
|
||||
* Time: 4:57 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
object QScriptUtils {
|
||||
|
||||
/**
|
||||
* Takes a bam list file and produces a scala list with each file allowing the bam list
|
||||
* to have empty lines and comment lines (lines starting with #).
|
||||
*/
|
||||
def createListFromFile(in: File):List[File] = {
|
||||
// If the file provided ends with .bam, it is not a bam list, we treat it as a single file.
|
||||
// and return a list with only this file.
|
||||
if (in.toString.endsWith(".bam"))
|
||||
return List(in)
|
||||
|
||||
var list: List[File] = List()
|
||||
for (bam <- fromFile(in).getLines)
|
||||
if (!bam.startsWith("#") && !bam.isEmpty )
|
||||
list :+= new File(bam.trim())
|
||||
list.sortWith(_.compareTo(_) < 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of contigs in the BAM file header.
|
||||
*/
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(bamFile)
|
||||
samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if there are multiple samples in a BAM file
|
||||
*/
|
||||
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
||||
var sample: String = ""
|
||||
for (r <- readGroups) {
|
||||
if (sample.isEmpty)
|
||||
sample = r.getSample
|
||||
else if (sample != r.getSample)
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Binary file not shown.
|
|
@ -1,4 +1,4 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="3"
|
||||
<info organisation="org.broad" module="tribble" revision="15"
|
||||
status="integration" publication="" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue