Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
92bbb9bbdd
|
|
@ -407,7 +407,14 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {
|
||||||
position(currentPosition);
|
position(currentPosition);
|
||||||
|
|
||||||
// Read data.
|
// Read data.
|
||||||
read(binHeader);
|
int binHeaderBytesRead = read(binHeader);
|
||||||
|
|
||||||
|
// Make sure we read in a complete bin header:
|
||||||
|
if ( binHeaderBytesRead < INT_SIZE_IN_BYTES * 3 ) {
|
||||||
|
throw new ReviewedStingException(String.format("Unable to read a complete bin header from BAM schedule file %s for BAM file %s. " +
|
||||||
|
"The BAM schedule file is likely incomplete/corrupt.",
|
||||||
|
scheduleFile.getAbsolutePath(), reader.getSamFilePath()));
|
||||||
|
}
|
||||||
|
|
||||||
// Decode contents.
|
// Decode contents.
|
||||||
binHeader.flip();
|
binHeader.flip();
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,8 @@ import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -245,7 +247,14 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
||||||
// This will ensure that if the two sets of contigs don't quite match (b36 male vs female ref, hg19 Epstein-Barr), then
|
// This will ensure that if the two sets of contigs don't quite match (b36 male vs female ref, hg19 Epstein-Barr), then
|
||||||
// we'll be using the correct contig index for the BAMs.
|
// we'll be using the correct contig index for the BAMs.
|
||||||
// TODO: Warning: assumes all BAMs use the same sequence dictionary! Get around this with contig aliasing.
|
// TODO: Warning: assumes all BAMs use the same sequence dictionary! Get around this with contig aliasing.
|
||||||
final int currentContigIndex = dataSource.getHeader().getSequence(currentLocus.getContig()).getSequenceIndex();
|
SAMSequenceRecord currentContigSequenceRecord = dataSource.getHeader().getSequence(currentLocus.getContig());
|
||||||
|
if ( currentContigSequenceRecord == null ) {
|
||||||
|
throw new UserException(String.format("Contig %s not present in sequence dictionary for merged BAM header: %s",
|
||||||
|
currentLocus.getContig(),
|
||||||
|
ReadUtils.prettyPrintSequenceRecords(dataSource.getHeader().getSequenceDictionary())));
|
||||||
|
}
|
||||||
|
|
||||||
|
final int currentContigIndex = currentContigSequenceRecord.getSequenceIndex();
|
||||||
|
|
||||||
// Stale reference sequence or first invocation. (Re)create the binTreeIterator.
|
// Stale reference sequence or first invocation. (Re)create the binTreeIterator.
|
||||||
if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentContigIndex) {
|
if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentContigIndex) {
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
|
@ -349,7 +350,18 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
private void read(final ByteBuffer buffer) {
|
private void read(final ByteBuffer buffer) {
|
||||||
try {
|
try {
|
||||||
fileChannel.read(buffer);
|
int bytesExpected = buffer.limit();
|
||||||
|
int bytesRead = fileChannel.read(buffer);
|
||||||
|
|
||||||
|
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||||
|
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
|
||||||
|
// must be truncated or otherwise corrupt:
|
||||||
|
if ( bytesRead < bytesExpected ) {
|
||||||
|
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
||||||
|
"It's likely that this file is truncated or corrupt -- " +
|
||||||
|
"Please try re-indexing the corresponding BAM file.",
|
||||||
|
mFile));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
||||||
|
|
|
||||||
|
|
@ -296,6 +296,10 @@ public class GATKReportTable {
|
||||||
return primaryKeyColumn.contains(primaryKey);
|
return primaryKeyColumn.contains(primaryKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Collection<Object> getPrimaryKeys() {
|
||||||
|
return Collections.unmodifiableCollection(primaryKeyColumn);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the value for a given position in the table
|
* Set the value for a given position in the table
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.commandline.Advanced;
|
import org.broadinstitute.sting.commandline.Advanced;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -115,6 +116,7 @@ import java.util.*;
|
||||||
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
|
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
@PartitionBy(PartitionType.NONE)
|
@PartitionBy(PartitionType.NONE)
|
||||||
|
@Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE)
|
||||||
public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partition,Map<String,int[]>>, CoveragePartitioner> implements TreeReducible<CoveragePartitioner> {
|
public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partition,Map<String,int[]>>, CoveragePartitioner> implements TreeReducible<CoveragePartitioner> {
|
||||||
@Output
|
@Output
|
||||||
@Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"})
|
@Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"})
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,162 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the read error rate per position in read (in the original 5'->3' orientation that the read had coming off the machine)
|
||||||
|
*
|
||||||
|
* Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read
|
||||||
|
* group in the input BAMs FOR ONLY THE FIRST OF PAIR READS.
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* Any number of BAM files
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate.
|
||||||
|
*
|
||||||
|
* For example, running this tool on the NA12878 data sets:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* ##:GATKReport.v0.2 ErrorRatePerCycle : The error rate per sequenced position in the reads
|
||||||
|
* readgroup cycle mismatches counts qual errorrate
|
||||||
|
* 20FUK.1 0 80 23368 25 3.47e-03
|
||||||
|
* 20FUK.1 1 40 23433 28 1.75e-03
|
||||||
|
* 20FUK.1 2 36 23453 28 1.58e-03
|
||||||
|
* 20FUK.1 3 26 23476 29 1.15e-03
|
||||||
|
* 20FUK.1 4 32 23495 29 1.40e-03
|
||||||
|
* up to 101 cycles
|
||||||
|
* 20FUK.2 0 77 20886 24 3.73e-03
|
||||||
|
* 20FUK.2 1 28 20920 29 1.39e-03
|
||||||
|
* 20FUK.2 2 24 20931 29 1.19e-03
|
||||||
|
* 20FUK.2 3 30 20940 28 1.48e-03
|
||||||
|
* 20FUK.2 4 25 20948 29 1.24e-03
|
||||||
|
* up to 101 cycles
|
||||||
|
* 20FUK.3 0 78 22038 24 3.58e-03
|
||||||
|
* 20FUK.3 1 40 22091 27 1.86e-03
|
||||||
|
* 20FUK.3 2 23 22108 30 1.09e-03
|
||||||
|
* 20FUK.3 3 36 22126 28 1.67e-03
|
||||||
|
* </pre>
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -T ErrorRatePerCycle
|
||||||
|
* -I bundle/current/b37/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam
|
||||||
|
* -R bundle/current/b37/human_g1k_v37.fasta
|
||||||
|
* -o example.gatkreport.txt
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Kiran Garimella, Mark DePristo
|
||||||
|
*/
|
||||||
|
public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
|
||||||
|
@Output PrintStream out;
|
||||||
|
@Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false)
|
||||||
|
public Integer MIN_BASE_QUAL = 0;
|
||||||
|
@Argument(fullName="min_mapping_quality_score", shortName="mmq", doc="Minimum read mapping quality required to consider a read for calling", required=false)
|
||||||
|
public Integer MIN_MAPPING_QUAL = 20;
|
||||||
|
|
||||||
|
private GATKReport report;
|
||||||
|
private GATKReportTable table;
|
||||||
|
private final static String reportName = "ErrorRatePerCycle";
|
||||||
|
private final static String reportDescription = "The error rate per sequenced position in the reads";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allows us to use multiple records for the key (read group x cycle)
|
||||||
|
*/
|
||||||
|
private static class TableKey implements Comparable<TableKey> {
|
||||||
|
final String readGroup;
|
||||||
|
final int cycle;
|
||||||
|
|
||||||
|
private TableKey(final String readGroup, final int cycle) {
|
||||||
|
this.readGroup = readGroup;
|
||||||
|
this.cycle = cycle;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(final TableKey tableKey) {
|
||||||
|
final int scmp = readGroup.compareTo(tableKey.readGroup);
|
||||||
|
if ( scmp == 0 )
|
||||||
|
return Integer.valueOf(cycle).compareTo(tableKey.cycle);
|
||||||
|
else
|
||||||
|
return scmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
report = new GATKReport();
|
||||||
|
report.addTable(reportName, reportDescription);
|
||||||
|
table = report.getTable(reportName);
|
||||||
|
table.addPrimaryKey("key", false);
|
||||||
|
table.addColumn("readgroup", 0);
|
||||||
|
table.addColumn("cycle", 0);
|
||||||
|
table.addColumn("mismatches", 0);
|
||||||
|
table.addColumn("counts", 0);
|
||||||
|
table.addColumn("qual", 0);
|
||||||
|
table.addColumn("errorrate", 0.0f, "%.2e");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
for ( final PileupElement p : context.getBasePileup() ) {
|
||||||
|
final GATKSAMRecord read = p.getRead();
|
||||||
|
final int offset = p.getOffset();
|
||||||
|
final boolean firstOfPair = ! read.getReadPairedFlag() || read.getFirstOfPairFlag();
|
||||||
|
|
||||||
|
if ( firstOfPair && read.getMappingQuality() >= MIN_MAPPING_QUAL && p.getQual() >= MIN_BASE_QUAL ) {
|
||||||
|
final byte readBase = p.getBase();
|
||||||
|
final byte refBase = ref.getBase();
|
||||||
|
final int cycle = offset;
|
||||||
|
|
||||||
|
if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) {
|
||||||
|
final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle);
|
||||||
|
|
||||||
|
if ( ! table.containsKey(key) ) {
|
||||||
|
table.set(key, "cycle", cycle);
|
||||||
|
table.set(key, "readgroup", read.getReadGroup().getReadGroupId());
|
||||||
|
}
|
||||||
|
|
||||||
|
table.increment(key, "counts");
|
||||||
|
if (readBase != refBase)
|
||||||
|
table.increment(key, "mismatches");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() { return null; }
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) { return null; }
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer sum) {
|
||||||
|
for ( final Object key : table.getPrimaryKeys() ) {
|
||||||
|
final int mismatches = (Integer)table.get(key, "mismatches");
|
||||||
|
final int count = (Integer)table.get(key, "counts");
|
||||||
|
final double errorRate = (mismatches + 1) / (1.0*(count + 1));
|
||||||
|
final int qual = QualityUtils.probToQual(1-errorRate, 0.0);
|
||||||
|
table.set(key, "qual", qual);
|
||||||
|
table.set(key, "errorrate", errorRate);
|
||||||
|
}
|
||||||
|
|
||||||
|
report.print(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -94,9 +94,6 @@ import java.util.Map;
|
||||||
*
|
*
|
||||||
* @author Mark DePristo
|
* @author Mark DePristo
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
||||||
@Output
|
@Output
|
||||||
public PrintStream out;
|
public PrintStream out;
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,10 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
import org.apache.commons.jexl2.Expression;
|
||||||
|
import org.apache.commons.jexl2.JexlContext;
|
||||||
|
import org.apache.commons.jexl2.JexlEngine;
|
||||||
|
import org.apache.commons.jexl2.MapContext;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -71,7 +75,7 @@ import java.util.*;
|
||||||
* <p>
|
* <p>
|
||||||
* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
|
* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
|
||||||
* data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
|
* data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
|
||||||
* include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
|
* include additional statistics such as mismatches and base qualitites around the calls, read strandness (how many
|
||||||
* forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
|
* forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
|
||||||
* statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
|
* statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
|
||||||
* attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
|
* attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
|
||||||
|
|
@ -88,6 +92,16 @@ import java.util.*;
|
||||||
* bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
|
* bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
|
||||||
* on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
|
* on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
|
||||||
*
|
*
|
||||||
|
* Which (putative) calls will make it into the output file(s) is controlled by an expression/list of expressions passed with -filter
|
||||||
|
* flag: if any of the expressions evaluate to TRUE, the site will be discarded. Otherwise the putative call and all the
|
||||||
|
* associated statistics will be printed into the output. Expressions recognize the following variables(in paired-sample
|
||||||
|
* somatic mode variables are prefixed with T_ and N_ for Tumor and Normal, e.g. N_COV and T_COV are defined instead of COV):
|
||||||
|
* COV for coverage at the site, INDEL_F for fraction of reads supporting consensus indel at the site (wrt total coverage),
|
||||||
|
* INDEL_CF for fraction of reads with consensus indel wrt all reads with an indel at the site, CONS_CNT for the count of
|
||||||
|
* reads supporting the consensus indel at the site. Conventional arithmetic and logical operations are supported. For instance,
|
||||||
|
* N_COV<4||T_COV<6||T_INDEL_F<0.3||T_INDEL_CF<0.7 instructs the tool to only output indel calls with at least 30% observed
|
||||||
|
* allelic fraction and with consensus indel making at least 70% of all indel observations at the site, and only at the sites
|
||||||
|
* where tumor coverage and normal coverage are at least 6 and 4, respectively.
|
||||||
* <h2>Input</h2>
|
* <h2>Input</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
|
* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
|
||||||
|
|
@ -147,30 +161,44 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
|
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
|
||||||
java.io.File bedOutput = null;
|
java.io.File bedOutput = null;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
@Argument(fullName="minCoverage", shortName="minCoverage",
|
@Argument(fullName="minCoverage", shortName="minCoverage",
|
||||||
doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
|
doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
|
||||||
"with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
|
"with --unpaired (single sample) option, this value is used for minimum sample coverage. "+
|
||||||
|
"INSTEAD USE: T_COV<cutoff (or COV<cutoff in unpaired mode) in -filter expression (see -filter).",
|
||||||
|
required=false)
|
||||||
int minCoverage = 6;
|
int minCoverage = 6;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
|
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
|
||||||
doc="used only in default (somatic) mode; normal sample must have at least minNormalCoverage "+
|
doc="used only in default (somatic) mode; normal sample must have at least minNormalCoverage "+
|
||||||
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
|
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored. "+
|
||||||
|
"INSTEAD USE: N_COV<cutoff in -filter expression (see -filter).", required=false)
|
||||||
int minNormalCoverage = 4;
|
int minNormalCoverage = 4;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
@Argument(fullName="minFraction", shortName="minFraction",
|
@Argument(fullName="minFraction", shortName="minFraction",
|
||||||
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
|
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, "+
|
||||||
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
|
"required for making a call"+
|
||||||
|
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction). "+
|
||||||
|
"INSTEAD USE: T_INDEL_F<cutoff (or INDEL_F<cutoff in unpaired mode) in -filter expression "+
|
||||||
|
"(see -filter).", required=false)
|
||||||
double minFraction = 0.3;
|
double minFraction = 0.3;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
|
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
|
||||||
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
|
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
|
||||||
"all indel observations at the site exceeds this threshold", required=false)
|
"all indel observations at the site exceeds this threshold. "+
|
||||||
|
"INSTEAD USE: T_INDEL_CF<cutoff (or INDEL_CF<cutoff in unpaired mode) in -filter expression "+
|
||||||
|
"(see -filter).", required=false)
|
||||||
double minConsensusFraction = 0.7;
|
double minConsensusFraction = 0.7;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
@Argument(fullName="minIndelCount", shortName="minCnt",
|
@Argument(fullName="minIndelCount", shortName="minCnt",
|
||||||
doc="Minimum count of reads supporting consensus indel required for making the call. "+
|
doc="Minimum count of reads supporting consensus indel required for making the call. "+
|
||||||
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
|
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
|
||||||
"(minIndelCount not met) will not pass.", required=false)
|
"(minIndelCount not met) will not pass. INSTEAD USE: T_CONS_CNT<cutoff "+
|
||||||
|
"(or CONS_CNT<cutoff in unpaired mode) in -filter expression (see -filter).", required=false)
|
||||||
int minIndelCount = 0;
|
int minIndelCount = 0;
|
||||||
|
|
||||||
@Argument(fullName="refseq", shortName="refseq",
|
@Argument(fullName="refseq", shortName="refseq",
|
||||||
|
|
@ -178,6 +206,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
"GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
|
"GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
|
||||||
String RefseqFileName = null;
|
String RefseqFileName = null;
|
||||||
|
|
||||||
|
|
||||||
|
@Argument(shortName="filter", doc="One or more logical expressions. If any of the expressions is TRUE, " +
|
||||||
|
"putative indel will be discarded and nothing will be printed into the output (unless genotyping "+
|
||||||
|
"at the specific position is explicitly requested, see -genotype). "+
|
||||||
|
"Default: T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7", required=false)
|
||||||
|
public ArrayList<String> FILTER_EXPRESSIONS = new ArrayList<String>();
|
||||||
|
|
||||||
//@Argument(fullName="blacklistedLanes", shortName="BL",
|
//@Argument(fullName="blacklistedLanes", shortName="BL",
|
||||||
// doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
|
// doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
|
||||||
// "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
|
// "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
|
||||||
|
|
@ -221,7 +256,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
private Writer verboseWriter = null;
|
private Writer verboseWriter = null;
|
||||||
|
|
||||||
|
|
||||||
private static String annGenomic = "GENOMIC";
|
private static String annGenomic = "GENOMIC\t";
|
||||||
private static String annIntron = "INTRON";
|
private static String annIntron = "INTRON";
|
||||||
private static String annUTR = "UTR";
|
private static String annUTR = "UTR";
|
||||||
private static String annCoding = "CODING";
|
private static String annCoding = "CODING";
|
||||||
|
|
@ -245,6 +280,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
|
private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
|
||||||
// can be 1 base before lastGenotyped start
|
// can be 1 base before lastGenotyped start
|
||||||
|
|
||||||
|
private JexlEngine jexlEngine = new JexlEngine();
|
||||||
|
private ArrayList<Expression> jexlExpressions = new ArrayList<Expression>();
|
||||||
|
|
||||||
|
// the following arrays store indel source-specific (normal/tumor) metric names
|
||||||
|
// for fast access when populating JEXL expression contexts (see IndelPrecall.fillContext())
|
||||||
|
private final static String[] normalMetricsCassette = new String[4];
|
||||||
|
private final static String[] tumorMetricsCassette = new String[4];
|
||||||
|
private final static String[] singleMetricsCassette = new String[4];
|
||||||
|
private final static int C_COV=0;
|
||||||
|
private final static int C_CONS_CNT=1;
|
||||||
|
private final static int C_INDEL_F=2;
|
||||||
|
private final static int C_INDEL_CF=3;
|
||||||
|
static {
|
||||||
|
normalMetricsCassette[C_COV] = "N_COV";
|
||||||
|
tumorMetricsCassette[C_COV] = "T_COV";
|
||||||
|
singleMetricsCassette[C_COV] = "COV";
|
||||||
|
normalMetricsCassette[C_CONS_CNT] = "N_CONS_CNT";
|
||||||
|
tumorMetricsCassette[C_CONS_CNT] = "T_CONS_CNT";
|
||||||
|
singleMetricsCassette[C_CONS_CNT] = "CONS_CNT";
|
||||||
|
normalMetricsCassette[C_INDEL_F] = "N_INDEL_F";
|
||||||
|
tumorMetricsCassette[C_INDEL_F] = "T_INDEL_F";
|
||||||
|
singleMetricsCassette[C_INDEL_F] = "INDEL_F";
|
||||||
|
normalMetricsCassette[C_INDEL_CF] = "N_INDEL_CF";
|
||||||
|
tumorMetricsCassette[C_INDEL_CF] = "T_INDEL_CF";
|
||||||
|
singleMetricsCassette[C_INDEL_CF] = "INDEL_CF";
|
||||||
|
}
|
||||||
|
|
||||||
// "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
|
// "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
|
||||||
|
|
||||||
|
|
@ -389,6 +450,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ;
|
vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ;
|
||||||
refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile);
|
refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile);
|
||||||
|
|
||||||
|
// Now initialize JEXL expressions:
|
||||||
|
if ( FILTER_EXPRESSIONS.size() == 0 ) {
|
||||||
|
if ( call_unpaired ) {
|
||||||
|
FILTER_EXPRESSIONS.add("COV<6||INDEL_F<0.3||INDEL_CF<0.7");
|
||||||
|
} else {
|
||||||
|
FILTER_EXPRESSIONS.add("T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for ( String s : FILTER_EXPRESSIONS ) {
|
||||||
|
try {
|
||||||
|
Expression e = jexlEngine.createExpression(s);
|
||||||
|
jexlExpressions.add(e);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new UserException.BadArgumentValue("Filter expression", "Invalid expression used (" + s + "). Please see the JEXL docs for correct syntax.") ;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -661,14 +740,26 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue;
|
if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue;
|
||||||
|
|
||||||
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
||||||
|
JexlContext jc = new MapContext();
|
||||||
|
normalCall.fillContext(jc,singleMetricsCassette);
|
||||||
|
boolean discard_event = false;
|
||||||
|
|
||||||
if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
|
for ( Expression e : jexlExpressions ) {
|
||||||
if ( DEBUG ) {
|
if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; }
|
||||||
System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
|
||||||
}
|
|
||||||
continue; // low coverage
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( discard_event && ! genotype ) {
|
||||||
|
normal_context.indelsAt(pos).clear();
|
||||||
|
continue; //*
|
||||||
|
}
|
||||||
|
|
||||||
|
// if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
|
||||||
|
// if ( DEBUG ) {
|
||||||
|
// System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||||
|
// }
|
||||||
|
// continue; // low coverage
|
||||||
|
// }
|
||||||
|
|
||||||
if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos);
|
if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos);
|
||||||
|
|
||||||
long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
|
long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
|
||||||
|
|
@ -697,24 +788,16 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos);
|
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos);
|
||||||
|
|
||||||
boolean haveCall = normalCall.isCall(); // cache the value
|
if ( ! discard_event ) normalCallsMade++;
|
||||||
|
printVCFLine(vcf_writer,normalCall, discard_event);
|
||||||
if ( haveCall || genotype) {
|
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
|
||||||
if ( haveCall ) normalCallsMade++;
|
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, discard_event);
|
||||||
printVCFLine(vcf_writer,normalCall);
|
lastGenotypedPosition = pos;
|
||||||
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
|
|
||||||
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall);
|
|
||||||
lastGenotypedPosition = pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
normal_context.indelsAt(pos).clear();
|
normal_context.indelsAt(pos).clear();
|
||||||
// we dealt with this indel; don't want to see it again
|
// we dealt with this indel; don't want to see it again
|
||||||
// (we might otherwise in the case when 1) there is another indel that follows
|
// (we might otherwise in the case when 1) there is another indel that follows
|
||||||
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
|
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
|
||||||
|
|
||||||
// for ( IndelVariant var : variants ) {
|
|
||||||
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
|
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
|
||||||
|
|
@ -829,18 +912,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH);
|
IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH);
|
||||||
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
||||||
|
|
||||||
if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
|
JexlContext jc = new MapContext();
|
||||||
if ( DEBUG ) {
|
tumorCall.fillContext(jc,tumorMetricsCassette);
|
||||||
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
|
normalCall.fillContext(jc,normalMetricsCassette);
|
||||||
}
|
boolean discard_event = false;
|
||||||
continue; // low coverage
|
|
||||||
|
for ( Expression e : jexlExpressions ) {
|
||||||
|
if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; }
|
||||||
}
|
}
|
||||||
if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
|
|
||||||
if ( DEBUG ) {
|
if ( discard_event && ! genotype ) {
|
||||||
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
tumor_context.indelsAt(pos).clear();
|
||||||
}
|
normal_context.indelsAt(pos).clear();
|
||||||
continue; // low coverage
|
continue; //*
|
||||||
}
|
}
|
||||||
|
// if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
|
||||||
|
// if ( DEBUG ) {
|
||||||
|
// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
|
||||||
|
// }
|
||||||
|
// continue; // low coverage
|
||||||
|
// }
|
||||||
|
// if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
|
||||||
|
// if ( DEBUG ) {
|
||||||
|
// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||||
|
// }
|
||||||
|
// continue; // low coverage
|
||||||
|
// }
|
||||||
|
|
||||||
if ( DEBUG ) {
|
if ( DEBUG ) {
|
||||||
System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, ");
|
System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, ");
|
||||||
|
|
@ -868,32 +965,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
|
if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
|
||||||
|
|
||||||
// location = getToolkit().getGenomeLocParser().setStart(location,pos);
|
|
||||||
// location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
|
|
||||||
|
|
||||||
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data
|
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data
|
||||||
|
|
||||||
boolean haveCall = tumorCall.isCall(); // cache the value
|
// boolean haveCall = tumorCall.isCall(); // cache the value
|
||||||
|
|
||||||
if ( haveCall || genotype ) {
|
if ( ! discard_event ) tumorCallsMade++;
|
||||||
if ( haveCall ) tumorCallsMade++;
|
|
||||||
|
|
||||||
printVCFLine(vcf_writer,normalCall,tumorCall);
|
printVCFLine(vcf_writer,normalCall,tumorCall,discard_event);
|
||||||
|
|
||||||
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
|
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
|
||||||
|
|
||||||
|
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall, discard_event );
|
||||||
|
lastGenotypedPosition = pos;
|
||||||
|
|
||||||
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall );
|
|
||||||
lastGenotypedPosition = pos;
|
|
||||||
}
|
|
||||||
tumor_context.indelsAt(pos).clear();
|
tumor_context.indelsAt(pos).clear();
|
||||||
normal_context.indelsAt(pos).clear();
|
normal_context.indelsAt(pos).clear();
|
||||||
// we dealt with this indel; don't want to see it again
|
// we dealt with this indel; don't want to see it again
|
||||||
// (we might otherwise in the case when 1) there is another indel that follows
|
// (we might otherwise in the case when 1) there is another indel that follows
|
||||||
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
|
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
|
||||||
|
|
||||||
// for ( IndelVariant var : variants ) {
|
|
||||||
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
|
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
|
||||||
|
|
@ -947,14 +1036,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) {
|
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, boolean discard_event) {
|
||||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||||
|
|
||||||
StringBuilder fullRecord = new StringBuilder();
|
StringBuilder fullRecord = new StringBuilder();
|
||||||
fullRecord.append(makeFullRecord(normalCall));
|
fullRecord.append(makeFullRecord(normalCall));
|
||||||
fullRecord.append(annotationString);
|
fullRecord.append(annotationString);
|
||||||
if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
if ( discard_event && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
||||||
try {
|
try {
|
||||||
verboseWriter.write(fullRecord.toString());
|
verboseWriter.write(fullRecord.toString());
|
||||||
verboseWriter.write('\n');
|
verboseWriter.write('\n');
|
||||||
|
|
@ -965,7 +1054,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) {
|
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall, boolean discard_event) {
|
||||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||||
|
|
||||||
|
|
@ -1013,7 +1102,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
fullRecord.append('\t');
|
fullRecord.append('\t');
|
||||||
fullRecord.append(annotationString);
|
fullRecord.append(annotationString);
|
||||||
|
|
||||||
if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
if ( discard_event && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
verboseWriter.write(fullRecord.toString());
|
verboseWriter.write(fullRecord.toString());
|
||||||
|
|
@ -1023,7 +1112,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printVCFLine(VCFWriter vcf, IndelPrecall call) {
|
public void printVCFLine(VCFWriter vcf, IndelPrecall call, boolean discard_event) {
|
||||||
|
|
||||||
long start = call.getPosition()-1;
|
long start = call.getPosition()-1;
|
||||||
// If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
|
// If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
|
||||||
|
|
@ -1060,14 +1149,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
Map<String,Object> attrs = call.makeStatsAttributes(null);
|
Map<String,Object> attrs = call.makeStatsAttributes(null);
|
||||||
|
|
||||||
if ( call.isCall() ) // we made a call - put actual het genotype here:
|
if ( ! discard_event ) // we made a call - put actual het genotype here:
|
||||||
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||||
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||||
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||||
|
|
||||||
}
|
}
|
||||||
Set<String> filters = null;
|
Set<String> filters = null;
|
||||||
if ( call.getVariant() != null && ! call.isCall() ) {
|
if ( call.getVariant() != null && discard_event ) {
|
||||||
filters = new HashSet<String>();
|
filters = new HashSet<String>();
|
||||||
filters.add("NoCall");
|
filters.add("NoCall");
|
||||||
}
|
}
|
||||||
|
|
@ -1095,7 +1184,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) {
|
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall, boolean discard_event) {
|
||||||
|
|
||||||
long start = tCall.getPosition()-1;
|
long start = tCall.getPosition()-1;
|
||||||
long stop = start;
|
long stop = start;
|
||||||
|
|
@ -1112,7 +1201,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
Map<String,Object> attrs = new HashMap();
|
Map<String,Object> attrs = new HashMap();
|
||||||
|
|
||||||
boolean isSomatic = false;
|
boolean isSomatic = false;
|
||||||
if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) {
|
if ( nCall.getVariant() == null && tCall.getVariant() != null ) {
|
||||||
isSomatic = true;
|
isSomatic = true;
|
||||||
attrs.put(VCFConstants.SOMATIC_KEY,true);
|
attrs.put(VCFConstants.SOMATIC_KEY,true);
|
||||||
}
|
}
|
||||||
|
|
@ -1155,7 +1244,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<String> filters = null;
|
Set<String> filters = null;
|
||||||
if ( tCall.getVariant() != null && ! tCall.isCall() ) {
|
if ( tCall.getVariant() != null && discard_event ) {
|
||||||
filters = new HashSet<String>();
|
filters = new HashSet<String>();
|
||||||
filters.add("NoCall");
|
filters.add("NoCall");
|
||||||
}
|
}
|
||||||
|
|
@ -1602,6 +1691,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
public IndelVariant getVariant() { return consensus_indel; }
|
public IndelVariant getVariant() { return consensus_indel; }
|
||||||
|
|
||||||
|
public void fillContext(JexlContext context,String[] cassette) {
|
||||||
|
context.set(cassette[C_INDEL_F],((double)consensus_indel_count)/total_coverage);
|
||||||
|
context.set(cassette[C_INDEL_CF],((double)consensus_indel_count/all_indel_count));
|
||||||
|
context.set(cassette[C_COV],total_coverage);
|
||||||
|
context.set(cassette[C_CONS_CNT],consensus_indel_count);
|
||||||
|
}
|
||||||
|
/*
|
||||||
public boolean isCall() {
|
public boolean isCall() {
|
||||||
boolean ret = ( consensus_indel_count >= minIndelCount &&
|
boolean ret = ( consensus_indel_count >= minIndelCount &&
|
||||||
(double)consensus_indel_count > minFraction * total_coverage &&
|
(double)consensus_indel_count > minFraction * total_coverage &&
|
||||||
|
|
@ -1610,10 +1706,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
" total_count="+all_indel_count+" cov="+total_coverage+
|
" total_count="+all_indel_count+" cov="+total_coverage+
|
||||||
" minConsensusF="+((double)consensus_indel_count)/all_indel_count+
|
" minConsensusF="+((double)consensus_indel_count)/all_indel_count+
|
||||||
" minF="+((double)consensus_indel_count)/total_coverage);
|
" minF="+((double)consensus_indel_count)/total_coverage);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
// return true;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
/** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed
|
/** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed
|
||||||
* variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus)
|
* variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus)
|
||||||
* @param variants
|
* @param variants
|
||||||
|
|
|
||||||
|
|
@ -134,6 +134,10 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
@Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false)
|
@Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false)
|
||||||
boolean sequenomOutput = false;
|
boolean sequenomOutput = false;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(doc="Use ILMN output format instead of regular FASTA",fullName="ilmn",required=false)
|
||||||
|
boolean ilmnOutput = false;
|
||||||
|
|
||||||
|
|
||||||
GenomeLoc prevInterval;
|
GenomeLoc prevInterval;
|
||||||
GenomeLoc allelePos;
|
GenomeLoc allelePos;
|
||||||
|
|
@ -141,6 +145,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
StringBuilder sequence;
|
StringBuilder sequence;
|
||||||
StringBuilder rawSequence;
|
StringBuilder rawSequence;
|
||||||
boolean sequenceInvalid;
|
boolean sequenceInvalid;
|
||||||
|
boolean isSiteSNP;
|
||||||
List<String> invReason;
|
List<String> invReason;
|
||||||
int indelCounter;
|
int indelCounter;
|
||||||
|
|
||||||
|
|
@ -169,6 +174,9 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
header.setSequenceDictionary(referenceDictionary);
|
header.setSequenceDictionary(referenceDictionary);
|
||||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ilmnOutput)
|
||||||
|
out.println("Locus_Name,Target_Type,Sequence,Chromosome,Coordinate,Genome_Build_Version,Source,Source_Version,Sequence_Orientation,Plus_Minus,Force_Infinium_I");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer reduceInit() {
|
public Integer reduceInit() {
|
||||||
|
|
@ -234,6 +242,8 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
rawSequence.append(Character.toUpperCase((char) ref.getBase()));
|
rawSequence.append(Character.toUpperCase((char) ref.getBase()));
|
||||||
} else if ( validate != null ) {
|
} else if ( validate != null ) {
|
||||||
|
// record variant type in case it's needed in output format
|
||||||
|
isSiteSNP = (validate.isSNP());
|
||||||
// doesn't matter if there's a mask here too -- this is what we want to validate
|
// doesn't matter if there's a mask here too -- this is what we want to validate
|
||||||
if ( validate.isFiltered() ) {
|
if ( validate.isFiltered() ) {
|
||||||
logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site.");
|
logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site.");
|
||||||
|
|
@ -496,13 +506,19 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
|
|
||||||
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
|
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
|
||||||
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
||||||
if (!sequenomOutput)
|
if (sequenomOutput) {
|
||||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
|
||||||
else {
|
|
||||||
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
||||||
probeName = probeName.replace("amplicon_","a");
|
probeName = probeName.replace("amplicon_","a");
|
||||||
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
||||||
}
|
}
|
||||||
|
else if (ilmnOutput) {
|
||||||
|
String type = isSiteSNP?"SNP":"INDEL";
|
||||||
|
seqIdentity = seqIdentity.replace("*",""); // no * in ref allele
|
||||||
|
out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart());
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||||
|
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.picard.util.IntervalTree;
|
import net.sf.picard.util.IntervalTree;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
@ -19,11 +20,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
|
@ -32,7 +30,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||||
|
|
@ -389,9 +386,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
nec.apply(tracker, ref, context, comp, eval);
|
nec.apply(tracker, ref, context, comp, eval);
|
||||||
}
|
}
|
||||||
|
|
||||||
// eval=null against all comps of different type
|
// eval=null against all comps of different type that aren't bound to another eval
|
||||||
for ( VariantContext otherComp : compSet ) {
|
for ( VariantContext otherComp : compSet ) {
|
||||||
if ( otherComp != comp ) {
|
if ( otherComp != comp && ! compHasMatchingEval(otherComp, evalSetBySample) ) {
|
||||||
synchronized (nec) {
|
synchronized (nec) {
|
||||||
nec.apply(tracker, ref, context, otherComp, null);
|
nec.apply(tracker, ref, context, otherComp, null);
|
||||||
}
|
}
|
||||||
|
|
@ -409,6 +406,35 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"comp != null", "evals != null"})
|
||||||
|
private boolean compHasMatchingEval(final VariantContext comp, final Collection<VariantContext> evals) {
|
||||||
|
// find all of the matching comps
|
||||||
|
for ( final VariantContext eval : evals ) {
|
||||||
|
if ( eval != null && doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) != EvalCompMatchType.NO_MATCH )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// nothing matched
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum EvalCompMatchType { NO_MATCH, STRICT, LENIENT }
|
||||||
|
|
||||||
|
@Requires({"eval != null", "comp != null"})
|
||||||
|
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
|
||||||
|
// find all of the matching comps
|
||||||
|
if ( comp.getType() != eval.getType() )
|
||||||
|
return EvalCompMatchType.NO_MATCH;
|
||||||
|
|
||||||
|
// find the comp which matches both the reference allele and alternate allele from eval
|
||||||
|
final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
|
||||||
|
final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
|
||||||
|
if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())))
|
||||||
|
return EvalCompMatchType.STRICT;
|
||||||
|
else
|
||||||
|
return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
|
||||||
|
}
|
||||||
|
|
||||||
private VariantContext findMatchingComp(final VariantContext eval, final Collection<VariantContext> comps) {
|
private VariantContext findMatchingComp(final VariantContext eval, final Collection<VariantContext> comps) {
|
||||||
// if no comps, return null
|
// if no comps, return null
|
||||||
if ( comps == null || comps.isEmpty() )
|
if ( comps == null || comps.isEmpty() )
|
||||||
|
|
@ -419,26 +445,21 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
return comps.iterator().next();
|
return comps.iterator().next();
|
||||||
|
|
||||||
// find all of the matching comps
|
// find all of the matching comps
|
||||||
List<VariantContext> matchingComps = new ArrayList<VariantContext>(comps.size());
|
VariantContext lenientMatch = null;
|
||||||
for ( VariantContext comp : comps ) {
|
for ( final VariantContext comp : comps ) {
|
||||||
if ( comp.getType() == eval.getType() )
|
switch ( doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) ) {
|
||||||
matchingComps.add(comp);
|
case STRICT:
|
||||||
|
return comp;
|
||||||
|
case LENIENT:
|
||||||
|
if ( lenientMatch == null ) lenientMatch = comp;
|
||||||
|
break;
|
||||||
|
case NO_MATCH:
|
||||||
|
;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if no matching comp, return null
|
// nothing matched, just return lenientMatch, which might be null
|
||||||
if ( matchingComps.size() == 0 )
|
return lenientMatch;
|
||||||
return null;
|
|
||||||
|
|
||||||
// find the comp which matches both the reference allele and alternate allele from eval
|
|
||||||
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
|
|
||||||
for ( VariantContext comp : matchingComps ) {
|
|
||||||
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
|
|
||||||
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
|
|
||||||
return comp;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if none match, just return the first one unless we require a strict match
|
|
||||||
return (requireStrictAlleleMatch ? null : matchingComps.get(0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
|
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
|
||||||
|
|
|
||||||
|
|
@ -417,8 +417,6 @@ public class VariantEvalUtils {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
stateKeys.add(stateKey);
|
stateKeys.add(stateKey);
|
||||||
|
|
||||||
return stateKeys;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return stateKeys;
|
return stateKeys;
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@ import net.sf.samtools.SAMUtils;
|
||||||
*/
|
*/
|
||||||
public class QualityUtils {
|
public class QualityUtils {
|
||||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||||
|
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
|
||||||
|
|
||||||
public final static double MIN_REASONABLE_ERROR = 0.0001;
|
public final static double MIN_REASONABLE_ERROR = 0.0001;
|
||||||
public final static byte MAX_REASONABLE_Q_SCORE = 40;
|
public final static byte MAX_REASONABLE_Q_SCORE = 40;
|
||||||
public final static byte MIN_USABLE_Q_SCORE = 6;
|
public final static byte MIN_USABLE_Q_SCORE = 6;
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -273,7 +274,7 @@ public class UserException extends ReviewedStingException {
|
||||||
public static class IncompatibleSequenceDictionaries extends UserException {
|
public static class IncompatibleSequenceDictionaries extends UserException {
|
||||||
public IncompatibleSequenceDictionaries(String message, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
|
public IncompatibleSequenceDictionaries(String message, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
|
||||||
super(String.format("Input files %s and %s have incompatible contigs: %s.\n %s contigs = %s\n %s contigs = %s",
|
super(String.format("Input files %s and %s have incompatible contigs: %s.\n %s contigs = %s\n %s contigs = %s",
|
||||||
name1, name2, message, name1, prettyPrintSequenceRecords(dict1), name2, prettyPrintSequenceRecords(dict2)));
|
name1, name2, message, name1, ReadUtils.prettyPrintSequenceRecords(dict1), name2, ReadUtils.prettyPrintSequenceRecords(dict2)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -284,17 +285,11 @@ public class UserException extends ReviewedStingException {
|
||||||
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
|
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
|
||||||
+ "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
|
+ "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
|
||||||
+ "\n %s contigs = %s",
|
+ "\n %s contigs = %s",
|
||||||
name, name, prettyPrintSequenceRecords(dict)));
|
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String prettyPrintSequenceRecords(SAMSequenceDictionary sequenceDictionary) {
|
|
||||||
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
|
|
||||||
int sequenceRecordIndex = 0;
|
|
||||||
for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences())
|
|
||||||
sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
|
|
||||||
return Arrays.deepToString(sequenceRecordNames);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class MissingWalker extends UserException {
|
public static class MissingWalker extends UserException {
|
||||||
public MissingWalker(String walkerName, String message) {
|
public MissingWalker(String walkerName, String message) {
|
||||||
|
|
|
||||||
|
|
@ -648,4 +648,12 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
|
return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) {
|
||||||
|
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
|
||||||
|
int sequenceRecordIndex = 0;
|
||||||
|
for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences())
|
||||||
|
sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
|
||||||
|
return Arrays.deepToString(sequenceRecordNames);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,8 @@ public abstract class BaseTest {
|
||||||
public static final String annotationDataLocation = GATKDataLocation + "Annotations/";
|
public static final String annotationDataLocation = GATKDataLocation + "Annotations/";
|
||||||
|
|
||||||
public static final String b37GoodBAM = validationDataLocation + "/CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
public static final String b37GoodBAM = validationDataLocation + "/CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||||
|
public static final String b37GoodNA12878BAM = validationDataLocation + "/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||||
|
public static final String b37_NA12878_OMNI = validationDataLocation + "/NA12878.omni.vcf";
|
||||||
|
|
||||||
public static final String refseqAnnotationLocation = annotationDataLocation + "refseq/";
|
public static final String refseqAnnotationLocation = annotationDataLocation + "refseq/";
|
||||||
public static final String hg18Refseq = refseqAnnotationLocation + "refGene-big-table-hg18.txt";
|
public static final String hg18Refseq = refseqAnnotationLocation + "refGene-big-table-hg18.txt";
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
@ -91,4 +92,16 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(bamIndex.getLevelSize(5),37448-4681+1);
|
Assert.assertEquals(bamIndex.getLevelSize(5),37448-4681+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||||
|
public void testDetectTruncatedBamIndexWordBoundary() {
|
||||||
|
GATKBAMIndex index = new GATKBAMIndex(new File(validationDataLocation + "truncated_at_word_boundary.bai"));
|
||||||
|
index.readReferenceSequence(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||||
|
public void testDetectTruncatedBamIndexNonWordBoundary() {
|
||||||
|
GATKBAMIndex index = new GATKBAMIndex(new File(validationDataLocation + "truncated_at_non_word_boundary.bai"));
|
||||||
|
index.readReferenceSequence(0);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
|
||||||
|
@Test
|
||||||
|
public void basicTest() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
|
||||||
|
1,
|
||||||
|
Arrays.asList("0cc212ecb6df300e321784039ff29f13"));
|
||||||
|
executeTest("ErrorRatePerCycle:", spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
"-o %s"
|
"-o %s"
|
||||||
),
|
),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("f909fd8374f663e983b9b3fda4cf5cf1")
|
Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943")
|
||||||
);
|
);
|
||||||
executeTest("testFunctionClassWithSnpeff", spec);
|
executeTest("testFunctionClassWithSnpeff", spec);
|
||||||
}
|
}
|
||||||
|
|
@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
||||||
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||||
1, Arrays.asList("4f60acc8a4b21c4b4ccb51ad9071449c"));
|
1, Arrays.asList("c49e239292704447a36e01ee9a71e729"));
|
||||||
executeTestParallel("testSelect1", spec);
|
executeTestParallel("testSelect1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -335,7 +335,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" --dbsnp " + b37dbSNP132 +
|
" --dbsnp " + b37dbSNP132 +
|
||||||
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||||
" -noST -ST Novelty -o %s";
|
" -noST -ST Novelty -o %s";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("190e1a171132832bf92fbca56a9c40bb"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956"));
|
||||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -347,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||||
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||||
" -noST -ST Novelty -o %s";
|
" -noST -ST Novelty -o %s";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("08586d443fdcf3b7f63b8f9e3a943c62"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859"));
|
||||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -463,7 +463,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
"-o %s"
|
"-o %s"
|
||||||
),
|
),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
|
Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c")
|
||||||
);
|
);
|
||||||
executeTest("testModernVCFWithLargeIndels", spec);
|
executeTest("testModernVCFWithLargeIndels", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.crypt;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.testng.SkipException;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
|
@ -64,11 +65,21 @@ public class CryptUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGATKMasterKeyPairMutualDecryption() {
|
public void testGATKMasterKeyPairMutualDecryption() {
|
||||||
|
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||||
|
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||||
|
"testGATKMasterKeyPairMutualDecryption"));
|
||||||
|
}
|
||||||
|
|
||||||
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey()));
|
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() {
|
public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() {
|
||||||
|
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||||
|
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||||
|
"testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption"));
|
||||||
|
}
|
||||||
|
|
||||||
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey()));
|
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -156,6 +167,11 @@ public class CryptUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLoadGATKMasterPrivateKey() {
|
public void testLoadGATKMasterPrivateKey() {
|
||||||
|
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||||
|
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||||
|
"testLoadGATKMasterPrivateKey"));
|
||||||
|
}
|
||||||
|
|
||||||
PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -174,4 +190,9 @@ public class CryptUtilsUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm());
|
Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm());
|
||||||
Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat());
|
Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean gatkPrivateKeyExistsButReadPermissionDenied() {
|
||||||
|
File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE);
|
||||||
|
return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.crypt;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.testng.SkipException;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
|
||||||
|
|
@ -39,6 +40,11 @@ public class GATKKeyUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCreateGATKKeyUsingMasterKeyPair() {
|
public void testCreateGATKKeyUsingMasterKeyPair() {
|
||||||
|
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||||
|
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||||
|
"testCreateGATKKeyUsingMasterKeyPair"));
|
||||||
|
}
|
||||||
|
|
||||||
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||||
PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey();
|
PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey();
|
||||||
|
|
||||||
|
|
@ -49,6 +55,11 @@ public class GATKKeyUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() {
|
public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() {
|
||||||
|
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||||
|
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||||
|
"testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey"));
|
||||||
|
}
|
||||||
|
|
||||||
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||||
PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||||
|
|
||||||
|
|
@ -82,8 +93,7 @@ public class GATKKeyUnitTest extends BaseTest {
|
||||||
KeyPair keyPair = CryptUtils.generateKeyPair();
|
KeyPair keyPair = CryptUtils.generateKeyPair();
|
||||||
|
|
||||||
// Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file:
|
// Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file:
|
||||||
GATKKey key = new GATKKey(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey(),
|
GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), emailAddressWithNulByte);
|
||||||
emailAddressWithNulByte);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -110,4 +120,9 @@ public class GATKKeyUnitTest extends BaseTest {
|
||||||
|
|
||||||
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile);
|
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean gatkPrivateKeyExistsButReadPermissionDenied() {
|
||||||
|
File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE);
|
||||||
|
return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue