Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
92bbb9bbdd
|
|
@ -407,7 +407,14 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {
|
|||
position(currentPosition);
|
||||
|
||||
// Read data.
|
||||
read(binHeader);
|
||||
int binHeaderBytesRead = read(binHeader);
|
||||
|
||||
// Make sure we read in a complete bin header:
|
||||
if ( binHeaderBytesRead < INT_SIZE_IN_BYTES * 3 ) {
|
||||
throw new ReviewedStingException(String.format("Unable to read a complete bin header from BAM schedule file %s for BAM file %s. " +
|
||||
"The BAM schedule file is likely incomplete/corrupt.",
|
||||
scheduleFile.getAbsolutePath(), reader.getSamFilePath()));
|
||||
}
|
||||
|
||||
// Decode contents.
|
||||
binHeader.flip();
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ import net.sf.samtools.SAMSequenceRecord;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -245,7 +247,14 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
// This will ensure that if the two sets of contigs don't quite match (b36 male vs female ref, hg19 Epstein-Barr), then
|
||||
// we'll be using the correct contig index for the BAMs.
|
||||
// TODO: Warning: assumes all BAMs use the same sequence dictionary! Get around this with contig aliasing.
|
||||
final int currentContigIndex = dataSource.getHeader().getSequence(currentLocus.getContig()).getSequenceIndex();
|
||||
SAMSequenceRecord currentContigSequenceRecord = dataSource.getHeader().getSequence(currentLocus.getContig());
|
||||
if ( currentContigSequenceRecord == null ) {
|
||||
throw new UserException(String.format("Contig %s not present in sequence dictionary for merged BAM header: %s",
|
||||
currentLocus.getContig(),
|
||||
ReadUtils.prettyPrintSequenceRecords(dataSource.getHeader().getSequenceDictionary())));
|
||||
}
|
||||
|
||||
final int currentContigIndex = currentContigSequenceRecord.getSequenceIndex();
|
||||
|
||||
// Stale reference sequence or first invocation. (Re)create the binTreeIterator.
|
||||
if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentContigIndex) {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
|||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
|
@ -349,7 +350,18 @@ public class GATKBAMIndex {
|
|||
|
||||
private void read(final ByteBuffer buffer) {
|
||||
try {
|
||||
fileChannel.read(buffer);
|
||||
int bytesExpected = buffer.limit();
|
||||
int bytesRead = fileChannel.read(buffer);
|
||||
|
||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
|
||||
// must be truncated or otherwise corrupt:
|
||||
if ( bytesRead < bytesExpected ) {
|
||||
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
||||
"It's likely that this file is truncated or corrupt -- " +
|
||||
"Please try re-indexing the corresponding BAM file.",
|
||||
mFile));
|
||||
}
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
||||
|
|
|
|||
|
|
@ -296,6 +296,10 @@ public class GATKReportTable {
|
|||
return primaryKeyColumn.contains(primaryKey);
|
||||
}
|
||||
|
||||
public Collection<Object> getPrimaryKeys() {
|
||||
return Collections.unmodifiableCollection(primaryKeyColumn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value for a given position in the table
|
||||
*
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord;
|
|||
import org.broadinstitute.sting.commandline.Advanced;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -115,6 +116,7 @@ import java.util.*;
|
|||
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
|
||||
@By(DataSource.REFERENCE)
|
||||
@PartitionBy(PartitionType.NONE)
|
||||
@Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE)
|
||||
public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partition,Map<String,int[]>>, CoveragePartitioner> implements TreeReducible<CoveragePartitioner> {
|
||||
@Output
|
||||
@Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"})
|
||||
|
|
|
|||
|
|
@ -0,0 +1,162 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Computes the read error rate per position in read (in the original 5'->3' orientation that the read had coming off the machine)
|
||||
*
|
||||
* Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read
|
||||
* group in the input BAMs FOR ONLY THE FIRST OF PAIR READS.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* Any number of BAM files
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate.
|
||||
*
|
||||
* For example, running this tool on the NA12878 data sets:
|
||||
*
|
||||
* <pre>
|
||||
* ##:GATKReport.v0.2 ErrorRatePerCycle : The error rate per sequenced position in the reads
|
||||
* readgroup cycle mismatches counts qual errorrate
|
||||
* 20FUK.1 0 80 23368 25 3.47e-03
|
||||
* 20FUK.1 1 40 23433 28 1.75e-03
|
||||
* 20FUK.1 2 36 23453 28 1.58e-03
|
||||
* 20FUK.1 3 26 23476 29 1.15e-03
|
||||
* 20FUK.1 4 32 23495 29 1.40e-03
|
||||
* up to 101 cycles
|
||||
* 20FUK.2 0 77 20886 24 3.73e-03
|
||||
* 20FUK.2 1 28 20920 29 1.39e-03
|
||||
* 20FUK.2 2 24 20931 29 1.19e-03
|
||||
* 20FUK.2 3 30 20940 28 1.48e-03
|
||||
* 20FUK.2 4 25 20948 29 1.24e-03
|
||||
* up to 101 cycles
|
||||
* 20FUK.3 0 78 22038 24 3.58e-03
|
||||
* 20FUK.3 1 40 22091 27 1.86e-03
|
||||
* 20FUK.3 2 23 22108 30 1.09e-03
|
||||
* 20FUK.3 3 36 22126 28 1.67e-03
|
||||
* </pre>
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T ErrorRatePerCycle
|
||||
* -I bundle/current/b37/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam
|
||||
* -R bundle/current/b37/human_g1k_v37.fasta
|
||||
* -o example.gatkreport.txt
|
||||
* </pre>
|
||||
*
|
||||
* @author Kiran Garimella, Mark DePristo
|
||||
*/
|
||||
public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
|
||||
@Output PrintStream out;
|
||||
@Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false)
|
||||
public Integer MIN_BASE_QUAL = 0;
|
||||
@Argument(fullName="min_mapping_quality_score", shortName="mmq", doc="Minimum read mapping quality required to consider a read for calling", required=false)
|
||||
public Integer MIN_MAPPING_QUAL = 20;
|
||||
|
||||
private GATKReport report;
|
||||
private GATKReportTable table;
|
||||
private final static String reportName = "ErrorRatePerCycle";
|
||||
private final static String reportDescription = "The error rate per sequenced position in the reads";
|
||||
|
||||
/**
|
||||
* Allows us to use multiple records for the key (read group x cycle)
|
||||
*/
|
||||
private static class TableKey implements Comparable<TableKey> {
|
||||
final String readGroup;
|
||||
final int cycle;
|
||||
|
||||
private TableKey(final String readGroup, final int cycle) {
|
||||
this.readGroup = readGroup;
|
||||
this.cycle = cycle;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final TableKey tableKey) {
|
||||
final int scmp = readGroup.compareTo(tableKey.readGroup);
|
||||
if ( scmp == 0 )
|
||||
return Integer.valueOf(cycle).compareTo(tableKey.cycle);
|
||||
else
|
||||
return scmp;
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
report = new GATKReport();
|
||||
report.addTable(reportName, reportDescription);
|
||||
table = report.getTable(reportName);
|
||||
table.addPrimaryKey("key", false);
|
||||
table.addColumn("readgroup", 0);
|
||||
table.addColumn("cycle", 0);
|
||||
table.addColumn("mismatches", 0);
|
||||
table.addColumn("counts", 0);
|
||||
table.addColumn("qual", 0);
|
||||
table.addColumn("errorrate", 0.0f, "%.2e");
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for ( final PileupElement p : context.getBasePileup() ) {
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
final int offset = p.getOffset();
|
||||
final boolean firstOfPair = ! read.getReadPairedFlag() || read.getFirstOfPairFlag();
|
||||
|
||||
if ( firstOfPair && read.getMappingQuality() >= MIN_MAPPING_QUAL && p.getQual() >= MIN_BASE_QUAL ) {
|
||||
final byte readBase = p.getBase();
|
||||
final byte refBase = ref.getBase();
|
||||
final int cycle = offset;
|
||||
|
||||
if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) {
|
||||
final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle);
|
||||
|
||||
if ( ! table.containsKey(key) ) {
|
||||
table.set(key, "cycle", cycle);
|
||||
table.set(key, "readgroup", read.getReadGroup().getReadGroupId());
|
||||
}
|
||||
|
||||
table.increment(key, "counts");
|
||||
if (readBase != refBase)
|
||||
table.increment(key, "mismatches");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return null; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) { return null; }
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
for ( final Object key : table.getPrimaryKeys() ) {
|
||||
final int mismatches = (Integer)table.get(key, "mismatches");
|
||||
final int count = (Integer)table.get(key, "counts");
|
||||
final double errorRate = (mismatches + 1) / (1.0*(count + 1));
|
||||
final int qual = QualityUtils.probToQual(1-errorRate, 0.0);
|
||||
table.set(key, "qual", qual);
|
||||
table.set(key, "errorrate", errorRate);
|
||||
}
|
||||
|
||||
report.print(out);
|
||||
}
|
||||
}
|
||||
|
|
@ -94,9 +94,6 @@ import java.util.Map;
|
|||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
|
||||
|
||||
|
||||
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
||||
@Output
|
||||
public PrintStream out;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.apache.commons.jexl2.Expression;
|
||||
import org.apache.commons.jexl2.JexlContext;
|
||||
import org.apache.commons.jexl2.JexlEngine;
|
||||
import org.apache.commons.jexl2.MapContext;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -71,7 +75,7 @@ import java.util.*;
|
|||
* <p>
|
||||
* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
|
||||
* data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
|
||||
* include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
|
||||
* include additional statistics such as mismatches and base qualitites around the calls, read strandness (how many
|
||||
* forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
|
||||
* statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
|
||||
* attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
|
||||
|
|
@ -88,6 +92,16 @@ import java.util.*;
|
|||
* bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
|
||||
* on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
|
||||
*
|
||||
* Which (putative) calls will make it into the output file(s) is controlled by an expression/list of expressions passed with -filter
|
||||
* flag: if any of the expressions evaluate to TRUE, the site will be discarded. Otherwise the putative call and all the
|
||||
* associated statistics will be printed into the output. Expressions recognize the following variables(in paired-sample
|
||||
* somatic mode variables are prefixed with T_ and N_ for Tumor and Normal, e.g. N_COV and T_COV are defined instead of COV):
|
||||
* COV for coverage at the site, INDEL_F for fraction of reads supporting consensus indel at the site (wrt total coverage),
|
||||
* INDEL_CF for fraction of reads with consensus indel wrt all reads with an indel at the site, CONS_CNT for the count of
|
||||
* reads supporting the consensus indel at the site. Conventional arithmetic and logical operations are supported. For instance,
|
||||
* N_COV<4||T_COV<6||T_INDEL_F<0.3||T_INDEL_CF<0.7 instructs the tool to only output indel calls with at least 30% observed
|
||||
* allelic fraction and with consensus indel making at least 70% of all indel observations at the site, and only at the sites
|
||||
* where tumor coverage and normal coverage are at least 6 and 4, respectively.
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
|
||||
|
|
@ -147,30 +161,44 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
|
||||
java.io.File bedOutput = null;
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="minCoverage", shortName="minCoverage",
|
||||
doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
|
||||
"with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
|
||||
"with --unpaired (single sample) option, this value is used for minimum sample coverage. "+
|
||||
"INSTEAD USE: T_COV<cutoff (or COV<cutoff in unpaired mode) in -filter expression (see -filter).",
|
||||
required=false)
|
||||
int minCoverage = 6;
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
|
||||
doc="used only in default (somatic) mode; normal sample must have at least minNormalCoverage "+
|
||||
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
|
||||
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored. "+
|
||||
"INSTEAD USE: N_COV<cutoff in -filter expression (see -filter).", required=false)
|
||||
int minNormalCoverage = 4;
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="minFraction", shortName="minFraction",
|
||||
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
|
||||
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
|
||||
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, "+
|
||||
"required for making a call"+
|
||||
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction). "+
|
||||
"INSTEAD USE: T_INDEL_F<cutoff (or INDEL_F<cutoff in unpaired mode) in -filter expression "+
|
||||
"(see -filter).", required=false)
|
||||
double minFraction = 0.3;
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
|
||||
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
|
||||
"all indel observations at the site exceeds this threshold", required=false)
|
||||
"all indel observations at the site exceeds this threshold. "+
|
||||
"INSTEAD USE: T_INDEL_CF<cutoff (or INDEL_CF<cutoff in unpaired mode) in -filter expression "+
|
||||
"(see -filter).", required=false)
|
||||
double minConsensusFraction = 0.7;
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="minIndelCount", shortName="minCnt",
|
||||
doc="Minimum count of reads supporting consensus indel required for making the call. "+
|
||||
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
|
||||
"(minIndelCount not met) will not pass.", required=false)
|
||||
"(minIndelCount not met) will not pass. INSTEAD USE: T_CONS_CNT<cutoff "+
|
||||
"(or CONS_CNT<cutoff in unpaired mode) in -filter expression (see -filter).", required=false)
|
||||
int minIndelCount = 0;
|
||||
|
||||
@Argument(fullName="refseq", shortName="refseq",
|
||||
|
|
@ -178,6 +206,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
"GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
|
||||
String RefseqFileName = null;
|
||||
|
||||
|
||||
@Argument(shortName="filter", doc="One or more logical expressions. If any of the expressions is TRUE, " +
|
||||
"putative indel will be discarded and nothing will be printed into the output (unless genotyping "+
|
||||
"at the specific position is explicitly requested, see -genotype). "+
|
||||
"Default: T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7", required=false)
|
||||
public ArrayList<String> FILTER_EXPRESSIONS = new ArrayList<String>();
|
||||
|
||||
//@Argument(fullName="blacklistedLanes", shortName="BL",
|
||||
// doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
|
||||
// "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
|
||||
|
|
@ -221,7 +256,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
private Writer verboseWriter = null;
|
||||
|
||||
|
||||
private static String annGenomic = "GENOMIC";
|
||||
private static String annGenomic = "GENOMIC\t";
|
||||
private static String annIntron = "INTRON";
|
||||
private static String annUTR = "UTR";
|
||||
private static String annCoding = "CODING";
|
||||
|
|
@ -245,6 +280,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
|
||||
// can be 1 base before lastGenotyped start
|
||||
|
||||
private JexlEngine jexlEngine = new JexlEngine();
|
||||
private ArrayList<Expression> jexlExpressions = new ArrayList<Expression>();
|
||||
|
||||
// the following arrays store indel source-specific (normal/tumor) metric names
|
||||
// for fast access when populating JEXL expression contexts (see IndelPrecall.fillContext())
|
||||
private final static String[] normalMetricsCassette = new String[4];
|
||||
private final static String[] tumorMetricsCassette = new String[4];
|
||||
private final static String[] singleMetricsCassette = new String[4];
|
||||
private final static int C_COV=0;
|
||||
private final static int C_CONS_CNT=1;
|
||||
private final static int C_INDEL_F=2;
|
||||
private final static int C_INDEL_CF=3;
|
||||
static {
|
||||
normalMetricsCassette[C_COV] = "N_COV";
|
||||
tumorMetricsCassette[C_COV] = "T_COV";
|
||||
singleMetricsCassette[C_COV] = "COV";
|
||||
normalMetricsCassette[C_CONS_CNT] = "N_CONS_CNT";
|
||||
tumorMetricsCassette[C_CONS_CNT] = "T_CONS_CNT";
|
||||
singleMetricsCassette[C_CONS_CNT] = "CONS_CNT";
|
||||
normalMetricsCassette[C_INDEL_F] = "N_INDEL_F";
|
||||
tumorMetricsCassette[C_INDEL_F] = "T_INDEL_F";
|
||||
singleMetricsCassette[C_INDEL_F] = "INDEL_F";
|
||||
normalMetricsCassette[C_INDEL_CF] = "N_INDEL_CF";
|
||||
tumorMetricsCassette[C_INDEL_CF] = "T_INDEL_CF";
|
||||
singleMetricsCassette[C_INDEL_CF] = "INDEL_CF";
|
||||
}
|
||||
|
||||
// "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
|
||||
|
||||
|
|
@ -389,6 +450,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ;
|
||||
refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile);
|
||||
|
||||
// Now initialize JEXL expressions:
|
||||
if ( FILTER_EXPRESSIONS.size() == 0 ) {
|
||||
if ( call_unpaired ) {
|
||||
FILTER_EXPRESSIONS.add("COV<6||INDEL_F<0.3||INDEL_CF<0.7");
|
||||
} else {
|
||||
FILTER_EXPRESSIONS.add("T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7");
|
||||
}
|
||||
}
|
||||
for ( String s : FILTER_EXPRESSIONS ) {
|
||||
try {
|
||||
Expression e = jexlEngine.createExpression(s);
|
||||
jexlExpressions.add(e);
|
||||
} catch (Exception e) {
|
||||
throw new UserException.BadArgumentValue("Filter expression", "Invalid expression used (" + s + "). Please see the JEXL docs for correct syntax.") ;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -661,14 +740,26 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue;
|
||||
|
||||
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
||||
JexlContext jc = new MapContext();
|
||||
normalCall.fillContext(jc,singleMetricsCassette);
|
||||
boolean discard_event = false;
|
||||
|
||||
if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
|
||||
if ( DEBUG ) {
|
||||
System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||
}
|
||||
continue; // low coverage
|
||||
for ( Expression e : jexlExpressions ) {
|
||||
if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; }
|
||||
}
|
||||
|
||||
if ( discard_event && ! genotype ) {
|
||||
normal_context.indelsAt(pos).clear();
|
||||
continue; //*
|
||||
}
|
||||
|
||||
// if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
|
||||
// if ( DEBUG ) {
|
||||
// System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||
// }
|
||||
// continue; // low coverage
|
||||
// }
|
||||
|
||||
if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos);
|
||||
|
||||
long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
|
||||
|
|
@ -697,24 +788,16 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos);
|
||||
|
||||
boolean haveCall = normalCall.isCall(); // cache the value
|
||||
|
||||
if ( haveCall || genotype) {
|
||||
if ( haveCall ) normalCallsMade++;
|
||||
printVCFLine(vcf_writer,normalCall);
|
||||
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
|
||||
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall);
|
||||
lastGenotypedPosition = pos;
|
||||
}
|
||||
if ( ! discard_event ) normalCallsMade++;
|
||||
printVCFLine(vcf_writer,normalCall, discard_event);
|
||||
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
|
||||
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, discard_event);
|
||||
lastGenotypedPosition = pos;
|
||||
|
||||
normal_context.indelsAt(pos).clear();
|
||||
// we dealt with this indel; don't want to see it again
|
||||
// (we might otherwise in the case when 1) there is another indel that follows
|
||||
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
|
||||
|
||||
// for ( IndelVariant var : variants ) {
|
||||
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
||||
// }
|
||||
}
|
||||
|
||||
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
|
||||
|
|
@ -829,18 +912,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH);
|
||||
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
||||
|
||||
if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
|
||||
if ( DEBUG ) {
|
||||
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
|
||||
}
|
||||
continue; // low coverage
|
||||
JexlContext jc = new MapContext();
|
||||
tumorCall.fillContext(jc,tumorMetricsCassette);
|
||||
normalCall.fillContext(jc,normalMetricsCassette);
|
||||
boolean discard_event = false;
|
||||
|
||||
for ( Expression e : jexlExpressions ) {
|
||||
if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; }
|
||||
}
|
||||
if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
|
||||
if ( DEBUG ) {
|
||||
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||
}
|
||||
continue; // low coverage
|
||||
|
||||
if ( discard_event && ! genotype ) {
|
||||
tumor_context.indelsAt(pos).clear();
|
||||
normal_context.indelsAt(pos).clear();
|
||||
continue; //*
|
||||
}
|
||||
// if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
|
||||
// if ( DEBUG ) {
|
||||
// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
|
||||
// }
|
||||
// continue; // low coverage
|
||||
// }
|
||||
// if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
|
||||
// if ( DEBUG ) {
|
||||
// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||
// }
|
||||
// continue; // low coverage
|
||||
// }
|
||||
|
||||
if ( DEBUG ) {
|
||||
System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, ");
|
||||
|
|
@ -868,32 +965,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
|
||||
|
||||
// location = getToolkit().getGenomeLocParser().setStart(location,pos);
|
||||
// location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
|
||||
|
||||
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data
|
||||
|
||||
boolean haveCall = tumorCall.isCall(); // cache the value
|
||||
// boolean haveCall = tumorCall.isCall(); // cache the value
|
||||
|
||||
if ( haveCall || genotype ) {
|
||||
if ( haveCall ) tumorCallsMade++;
|
||||
if ( ! discard_event ) tumorCallsMade++;
|
||||
|
||||
printVCFLine(vcf_writer,normalCall,tumorCall);
|
||||
printVCFLine(vcf_writer,normalCall,tumorCall,discard_event);
|
||||
|
||||
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
|
||||
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
|
||||
|
||||
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall, discard_event );
|
||||
lastGenotypedPosition = pos;
|
||||
|
||||
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall );
|
||||
lastGenotypedPosition = pos;
|
||||
}
|
||||
tumor_context.indelsAt(pos).clear();
|
||||
normal_context.indelsAt(pos).clear();
|
||||
// we dealt with this indel; don't want to see it again
|
||||
// (we might otherwise in the case when 1) there is another indel that follows
|
||||
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
|
||||
|
||||
// for ( IndelVariant var : variants ) {
|
||||
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
||||
// }
|
||||
}
|
||||
|
||||
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
|
||||
|
|
@ -947,14 +1036,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
}
|
||||
|
||||
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) {
|
||||
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, boolean discard_event) {
|
||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||
|
||||
StringBuilder fullRecord = new StringBuilder();
|
||||
fullRecord.append(makeFullRecord(normalCall));
|
||||
fullRecord.append(annotationString);
|
||||
if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
||||
if ( discard_event && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
||||
try {
|
||||
verboseWriter.write(fullRecord.toString());
|
||||
verboseWriter.write('\n');
|
||||
|
|
@ -965,7 +1054,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
|
||||
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) {
|
||||
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall, boolean discard_event) {
|
||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||
|
||||
|
|
@ -1013,7 +1102,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
fullRecord.append('\t');
|
||||
fullRecord.append(annotationString);
|
||||
|
||||
if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
||||
if ( discard_event && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
||||
|
||||
try {
|
||||
verboseWriter.write(fullRecord.toString());
|
||||
|
|
@ -1023,7 +1112,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
public void printVCFLine(VCFWriter vcf, IndelPrecall call) {
|
||||
public void printVCFLine(VCFWriter vcf, IndelPrecall call, boolean discard_event) {
|
||||
|
||||
long start = call.getPosition()-1;
|
||||
// If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
|
||||
|
|
@ -1060,14 +1149,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
Map<String,Object> attrs = call.makeStatsAttributes(null);
|
||||
|
||||
if ( call.isCall() ) // we made a call - put actual het genotype here:
|
||||
if ( ! discard_event ) // we made a call - put actual het genotype here:
|
||||
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||
|
||||
}
|
||||
Set<String> filters = null;
|
||||
if ( call.getVariant() != null && ! call.isCall() ) {
|
||||
if ( call.getVariant() != null && discard_event ) {
|
||||
filters = new HashSet<String>();
|
||||
filters.add("NoCall");
|
||||
}
|
||||
|
|
@ -1095,7 +1184,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) {
|
||||
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall, boolean discard_event) {
|
||||
|
||||
long start = tCall.getPosition()-1;
|
||||
long stop = start;
|
||||
|
|
@ -1112,7 +1201,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
Map<String,Object> attrs = new HashMap();
|
||||
|
||||
boolean isSomatic = false;
|
||||
if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) {
|
||||
if ( nCall.getVariant() == null && tCall.getVariant() != null ) {
|
||||
isSomatic = true;
|
||||
attrs.put(VCFConstants.SOMATIC_KEY,true);
|
||||
}
|
||||
|
|
@ -1155,7 +1244,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
Set<String> filters = null;
|
||||
if ( tCall.getVariant() != null && ! tCall.isCall() ) {
|
||||
if ( tCall.getVariant() != null && discard_event ) {
|
||||
filters = new HashSet<String>();
|
||||
filters.add("NoCall");
|
||||
}
|
||||
|
|
@ -1602,6 +1691,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
public IndelVariant getVariant() { return consensus_indel; }
|
||||
|
||||
public void fillContext(JexlContext context,String[] cassette) {
|
||||
context.set(cassette[C_INDEL_F],((double)consensus_indel_count)/total_coverage);
|
||||
context.set(cassette[C_INDEL_CF],((double)consensus_indel_count/all_indel_count));
|
||||
context.set(cassette[C_COV],total_coverage);
|
||||
context.set(cassette[C_CONS_CNT],consensus_indel_count);
|
||||
}
|
||||
/*
|
||||
public boolean isCall() {
|
||||
boolean ret = ( consensus_indel_count >= minIndelCount &&
|
||||
(double)consensus_indel_count > minFraction * total_coverage &&
|
||||
|
|
@ -1610,10 +1706,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
" total_count="+all_indel_count+" cov="+total_coverage+
|
||||
" minConsensusF="+((double)consensus_indel_count)/all_indel_count+
|
||||
" minF="+((double)consensus_indel_count)/total_coverage);
|
||||
|
||||
return ret;
|
||||
|
||||
// return true;
|
||||
}
|
||||
|
||||
*/
|
||||
/** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed
|
||||
* variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus)
|
||||
* @param variants
|
||||
|
|
|
|||
|
|
@ -134,6 +134,10 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
@Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false)
|
||||
boolean sequenomOutput = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(doc="Use ILMN output format instead of regular FASTA",fullName="ilmn",required=false)
|
||||
boolean ilmnOutput = false;
|
||||
|
||||
|
||||
GenomeLoc prevInterval;
|
||||
GenomeLoc allelePos;
|
||||
|
|
@ -141,6 +145,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
StringBuilder sequence;
|
||||
StringBuilder rawSequence;
|
||||
boolean sequenceInvalid;
|
||||
boolean isSiteSNP;
|
||||
List<String> invReason;
|
||||
int indelCounter;
|
||||
|
||||
|
|
@ -169,6 +174,9 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
header.setSequenceDictionary(referenceDictionary);
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
}
|
||||
|
||||
if (ilmnOutput)
|
||||
out.println("Locus_Name,Target_Type,Sequence,Chromosome,Coordinate,Genome_Build_Version,Source,Source_Version,Sequence_Orientation,Plus_Minus,Force_Infinium_I");
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
|
|
@ -234,6 +242,8 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
}
|
||||
rawSequence.append(Character.toUpperCase((char) ref.getBase()));
|
||||
} else if ( validate != null ) {
|
||||
// record variant type in case it's needed in output format
|
||||
isSiteSNP = (validate.isSNP());
|
||||
// doesn't matter if there's a mask here too -- this is what we want to validate
|
||||
if ( validate.isFiltered() ) {
|
||||
logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site.");
|
||||
|
|
@ -496,13 +506,19 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
|
||||
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
|
||||
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
||||
if (!sequenomOutput)
|
||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||
else {
|
||||
if (sequenomOutput) {
|
||||
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
||||
probeName = probeName.replace("amplicon_","a");
|
||||
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
||||
}
|
||||
else if (ilmnOutput) {
|
||||
String type = isSiteSNP?"SNP":"INDEL";
|
||||
seqIdentity = seqIdentity.replace("*",""); // no * in ref allele
|
||||
out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart());
|
||||
}
|
||||
else{
|
||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.util.IntervalTree;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
|
@ -19,11 +20,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
@ -32,7 +30,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
|
@ -389,9 +386,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
nec.apply(tracker, ref, context, comp, eval);
|
||||
}
|
||||
|
||||
// eval=null against all comps of different type
|
||||
// eval=null against all comps of different type that aren't bound to another eval
|
||||
for ( VariantContext otherComp : compSet ) {
|
||||
if ( otherComp != comp ) {
|
||||
if ( otherComp != comp && ! compHasMatchingEval(otherComp, evalSetBySample) ) {
|
||||
synchronized (nec) {
|
||||
nec.apply(tracker, ref, context, otherComp, null);
|
||||
}
|
||||
|
|
@ -409,6 +406,35 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
return null;
|
||||
}
|
||||
|
||||
@Requires({"comp != null", "evals != null"})
|
||||
private boolean compHasMatchingEval(final VariantContext comp, final Collection<VariantContext> evals) {
|
||||
// find all of the matching comps
|
||||
for ( final VariantContext eval : evals ) {
|
||||
if ( eval != null && doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) != EvalCompMatchType.NO_MATCH )
|
||||
return true;
|
||||
}
|
||||
|
||||
// nothing matched
|
||||
return false;
|
||||
}
|
||||
|
||||
private enum EvalCompMatchType { NO_MATCH, STRICT, LENIENT }
|
||||
|
||||
@Requires({"eval != null", "comp != null"})
|
||||
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
|
||||
// find all of the matching comps
|
||||
if ( comp.getType() != eval.getType() )
|
||||
return EvalCompMatchType.NO_MATCH;
|
||||
|
||||
// find the comp which matches both the reference allele and alternate allele from eval
|
||||
final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
|
||||
final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
|
||||
if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())))
|
||||
return EvalCompMatchType.STRICT;
|
||||
else
|
||||
return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
|
||||
}
|
||||
|
||||
private VariantContext findMatchingComp(final VariantContext eval, final Collection<VariantContext> comps) {
|
||||
// if no comps, return null
|
||||
if ( comps == null || comps.isEmpty() )
|
||||
|
|
@ -419,26 +445,21 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
return comps.iterator().next();
|
||||
|
||||
// find all of the matching comps
|
||||
List<VariantContext> matchingComps = new ArrayList<VariantContext>(comps.size());
|
||||
for ( VariantContext comp : comps ) {
|
||||
if ( comp.getType() == eval.getType() )
|
||||
matchingComps.add(comp);
|
||||
VariantContext lenientMatch = null;
|
||||
for ( final VariantContext comp : comps ) {
|
||||
switch ( doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) ) {
|
||||
case STRICT:
|
||||
return comp;
|
||||
case LENIENT:
|
||||
if ( lenientMatch == null ) lenientMatch = comp;
|
||||
break;
|
||||
case NO_MATCH:
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
// if no matching comp, return null
|
||||
if ( matchingComps.size() == 0 )
|
||||
return null;
|
||||
|
||||
// find the comp which matches both the reference allele and alternate allele from eval
|
||||
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
|
||||
for ( VariantContext comp : matchingComps ) {
|
||||
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
|
||||
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
|
||||
return comp;
|
||||
}
|
||||
|
||||
// if none match, just return the first one unless we require a strict match
|
||||
return (requireStrictAlleleMatch ? null : matchingComps.get(0));
|
||||
// nothing matched, just return lenientMatch, which might be null
|
||||
return lenientMatch;
|
||||
}
|
||||
|
||||
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
|
||||
|
|
|
|||
|
|
@ -417,8 +417,6 @@ public class VariantEvalUtils {
|
|||
}
|
||||
} else {
|
||||
stateKeys.add(stateKey);
|
||||
|
||||
return stateKeys;
|
||||
}
|
||||
|
||||
return stateKeys;
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ import net.sf.samtools.SAMUtils;
|
|||
*/
|
||||
public class QualityUtils {
|
||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
|
||||
|
||||
public final static double MIN_REASONABLE_ERROR = 0.0001;
|
||||
public final static byte MAX_REASONABLE_Q_SCORE = 40;
|
||||
public final static byte MIN_USABLE_Q_SCORE = 6;
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
|
|||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -273,7 +274,7 @@ public class UserException extends ReviewedStingException {
|
|||
public static class IncompatibleSequenceDictionaries extends UserException {
|
||||
public IncompatibleSequenceDictionaries(String message, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
|
||||
super(String.format("Input files %s and %s have incompatible contigs: %s.\n %s contigs = %s\n %s contigs = %s",
|
||||
name1, name2, message, name1, prettyPrintSequenceRecords(dict1), name2, prettyPrintSequenceRecords(dict2)));
|
||||
name1, name2, message, name1, ReadUtils.prettyPrintSequenceRecords(dict1), name2, ReadUtils.prettyPrintSequenceRecords(dict2)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -284,17 +285,11 @@ public class UserException extends ReviewedStingException {
|
|||
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
|
||||
+ "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
|
||||
+ "\n %s contigs = %s",
|
||||
name, name, prettyPrintSequenceRecords(dict)));
|
||||
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
|
||||
}
|
||||
}
|
||||
|
||||
private static String prettyPrintSequenceRecords(SAMSequenceDictionary sequenceDictionary) {
|
||||
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
|
||||
int sequenceRecordIndex = 0;
|
||||
for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences())
|
||||
sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
|
||||
return Arrays.deepToString(sequenceRecordNames);
|
||||
}
|
||||
|
||||
|
||||
public static class MissingWalker extends UserException {
|
||||
public MissingWalker(String walkerName, String message) {
|
||||
|
|
|
|||
|
|
@ -648,4 +648,12 @@ public class ReadUtils {
|
|||
}
|
||||
return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
|
||||
}
|
||||
|
||||
public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) {
|
||||
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
|
||||
int sequenceRecordIndex = 0;
|
||||
for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences())
|
||||
sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
|
||||
return Arrays.deepToString(sequenceRecordNames);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,6 +61,8 @@ public abstract class BaseTest {
|
|||
public static final String annotationDataLocation = GATKDataLocation + "Annotations/";
|
||||
|
||||
public static final String b37GoodBAM = validationDataLocation + "/CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
public static final String b37GoodNA12878BAM = validationDataLocation + "/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
public static final String b37_NA12878_OMNI = validationDataLocation + "/NA12878.omni.vcf";
|
||||
|
||||
public static final String refseqAnnotationLocation = annotationDataLocation + "refseq/";
|
||||
public static final String hg18Refseq = refseqAnnotationLocation + "refGene-big-table-hg18.txt";
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
|||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -91,4 +92,16 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
|||
Assert.assertEquals(bamIndex.getLevelSize(5),37448-4681+1);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||
public void testDetectTruncatedBamIndexWordBoundary() {
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(validationDataLocation + "truncated_at_word_boundary.bai"));
|
||||
index.readReferenceSequence(0);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||
public void testDetectTruncatedBamIndexNonWordBoundary() {
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(validationDataLocation + "truncated_at_non_word_boundary.bai"));
|
||||
index.readReferenceSequence(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void basicTest() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
|
||||
1,
|
||||
Arrays.asList("0cc212ecb6df300e321784039ff29f13"));
|
||||
executeTest("ErrorRatePerCycle:", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("f909fd8374f663e983b9b3fda4cf5cf1")
|
||||
Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943")
|
||||
);
|
||||
executeTest("testFunctionClassWithSnpeff", spec);
|
||||
}
|
||||
|
|
@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
||||
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||
1, Arrays.asList("4f60acc8a4b21c4b4ccb51ad9071449c"));
|
||||
1, Arrays.asList("c49e239292704447a36e01ee9a71e729"));
|
||||
executeTestParallel("testSelect1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -335,7 +335,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" --dbsnp " + b37dbSNP132 +
|
||||
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("190e1a171132832bf92fbca56a9c40bb"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956"));
|
||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
|
|
@ -347,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("08586d443fdcf3b7f63b8f9e3a943c62"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859"));
|
||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
|
|
@ -463,7 +463,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
|
||||
Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c")
|
||||
);
|
||||
executeTest("testModernVCFWithLargeIndels", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.crypt;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.SkipException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -64,11 +65,21 @@ public class CryptUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testGATKMasterKeyPairMutualDecryption() {
|
||||
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||
"testGATKMasterKeyPairMutualDecryption"));
|
||||
}
|
||||
|
||||
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() {
|
||||
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||
"testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption"));
|
||||
}
|
||||
|
||||
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey()));
|
||||
}
|
||||
|
||||
|
|
@ -156,6 +167,11 @@ public class CryptUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testLoadGATKMasterPrivateKey() {
|
||||
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||
"testLoadGATKMasterPrivateKey"));
|
||||
}
|
||||
|
||||
PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||
}
|
||||
|
||||
|
|
@ -174,4 +190,9 @@ public class CryptUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm());
|
||||
Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat());
|
||||
}
|
||||
|
||||
private boolean gatkPrivateKeyExistsButReadPermissionDenied() {
|
||||
File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE);
|
||||
return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.crypt;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.SkipException;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
||||
|
|
@ -39,6 +40,11 @@ public class GATKKeyUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCreateGATKKeyUsingMasterKeyPair() {
|
||||
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||
"testCreateGATKKeyUsingMasterKeyPair"));
|
||||
}
|
||||
|
||||
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||
PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey();
|
||||
|
||||
|
|
@ -49,6 +55,11 @@ public class GATKKeyUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() {
|
||||
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
|
||||
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
|
||||
"testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey"));
|
||||
}
|
||||
|
||||
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||
PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||
|
||||
|
|
@ -82,8 +93,7 @@ public class GATKKeyUnitTest extends BaseTest {
|
|||
KeyPair keyPair = CryptUtils.generateKeyPair();
|
||||
|
||||
// Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file:
|
||||
GATKKey key = new GATKKey(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey(),
|
||||
emailAddressWithNulByte);
|
||||
GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), emailAddressWithNulByte);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -110,4 +120,9 @@ public class GATKKeyUnitTest extends BaseTest {
|
|||
|
||||
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile);
|
||||
}
|
||||
|
||||
private boolean gatkPrivateKeyExistsButReadPermissionDenied() {
|
||||
File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE);
|
||||
return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue