Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-03-10 10:09:57 -05:00
commit 92bbb9bbdd
20 changed files with 536 additions and 114 deletions

View File

@ -407,7 +407,14 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {
position(currentPosition);
// Read data.
read(binHeader);
int binHeaderBytesRead = read(binHeader);
// Make sure we read in a complete bin header:
if ( binHeaderBytesRead < INT_SIZE_IN_BYTES * 3 ) {
throw new ReviewedStingException(String.format("Unable to read a complete bin header from BAM schedule file %s for BAM file %s. " +
"The BAM schedule file is likely incomplete/corrupt.",
scheduleFile.getAbsolutePath(), reader.getSamFilePath()));
}
// Decode contents.
binHeader.flip();

View File

@ -34,6 +34,8 @@ import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.*;
@ -245,7 +247,14 @@ public class BAMScheduler implements Iterator<FilePointer> {
// This will ensure that if the two sets of contigs don't quite match (b36 male vs female ref, hg19 Epstein-Barr), then
// we'll be using the correct contig index for the BAMs.
// TODO: Warning: assumes all BAMs use the same sequence dictionary! Get around this with contig aliasing.
final int currentContigIndex = dataSource.getHeader().getSequence(currentLocus.getContig()).getSequenceIndex();
SAMSequenceRecord currentContigSequenceRecord = dataSource.getHeader().getSequence(currentLocus.getContig());
if ( currentContigSequenceRecord == null ) {
throw new UserException(String.format("Contig %s not present in sequence dictionary for merged BAM header: %s",
currentLocus.getContig(),
ReadUtils.prettyPrintSequenceRecords(dataSource.getHeader().getSequenceDictionary())));
}
final int currentContigIndex = currentContigSequenceRecord.getSequenceIndex();
// Stale reference sequence or first invocation. (Re)create the binTreeIterator.
if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentContigIndex) {

View File

@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.FileInputStream;
@ -349,7 +350,18 @@ public class GATKBAMIndex {
private void read(final ByteBuffer buffer) {
try {
fileChannel.read(buffer);
int bytesExpected = buffer.limit();
int bytesRead = fileChannel.read(buffer);
// We have a rigid expectation here to read in exactly the number of bytes we've limited
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
// must be truncated or otherwise corrupt:
if ( bytesRead < bytesExpected ) {
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
"It's likely that this file is truncated or corrupt -- " +
"Please try re-indexing the corresponding BAM file.",
mFile));
}
}
catch(IOException ex) {
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);

View File

@ -296,6 +296,10 @@ public class GATKReportTable {
return primaryKeyColumn.contains(primaryKey);
}
public Collection<Object> getPrimaryKeys() {
return Collections.unmodifiableCollection(primaryKeyColumn);
}
/**
* Set the value for a given position in the table
*

View File

@ -29,6 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -115,6 +116,7 @@ import java.util.*;
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
@By(DataSource.REFERENCE)
@PartitionBy(PartitionType.NONE)
@Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE)
public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partition,Map<String,int[]>>, CoveragePartitioner> implements TreeReducible<CoveragePartitioner> {
@Output
@Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"})

View File

@ -0,0 +1,162 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
/**
* Computes the read error rate per position in read (in the original 5'->3' orientation that the read had coming off the machine)
*
* Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read
* group in the input BAMs FOR ONLY THE FIRST OF PAIR READS.
*
* <h2>Input</h2>
* <p>
* Any number of BAM files
* </p>
*
* <h2>Output</h2>
* <p>
* GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate.
*
* For example, running this tool on the NA12878 data sets:
*
* <pre>
* ##:GATKReport.v0.2 ErrorRatePerCycle : The error rate per sequenced position in the reads
* readgroup cycle mismatches counts qual errorrate
* 20FUK.1 0 80 23368 25 3.47e-03
* 20FUK.1 1 40 23433 28 1.75e-03
* 20FUK.1 2 36 23453 28 1.58e-03
* 20FUK.1 3 26 23476 29 1.15e-03
* 20FUK.1 4 32 23495 29 1.40e-03
* up to 101 cycles
* 20FUK.2 0 77 20886 24 3.73e-03
* 20FUK.2 1 28 20920 29 1.39e-03
* 20FUK.2 2 24 20931 29 1.19e-03
* 20FUK.2 3 30 20940 28 1.48e-03
* 20FUK.2 4 25 20948 29 1.24e-03
* up to 101 cycles
* 20FUK.3 0 78 22038 24 3.58e-03
* 20FUK.3 1 40 22091 27 1.86e-03
* 20FUK.3 2 23 22108 30 1.09e-03
* 20FUK.3 3 36 22126 28 1.67e-03
* </pre>
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T ErrorRatePerCycle
* -I bundle/current/b37/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam
* -R bundle/current/b37/human_g1k_v37.fasta
* -o example.gatkreport.txt
* </pre>
*
* @author Kiran Garimella, Mark DePristo
*/
public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
@Output PrintStream out;
@Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false)
public Integer MIN_BASE_QUAL = 0;
@Argument(fullName="min_mapping_quality_score", shortName="mmq", doc="Minimum read mapping quality required to consider a read for calling", required=false)
public Integer MIN_MAPPING_QUAL = 20;
private GATKReport report;
private GATKReportTable table;
private final static String reportName = "ErrorRatePerCycle";
private final static String reportDescription = "The error rate per sequenced position in the reads";
/**
* Allows us to use multiple records for the key (read group x cycle)
*/
private static class TableKey implements Comparable<TableKey> {
final String readGroup;
final int cycle;
private TableKey(final String readGroup, final int cycle) {
this.readGroup = readGroup;
this.cycle = cycle;
}
@Override
public int compareTo(final TableKey tableKey) {
final int scmp = readGroup.compareTo(tableKey.readGroup);
if ( scmp == 0 )
return Integer.valueOf(cycle).compareTo(tableKey.cycle);
else
return scmp;
}
}
public void initialize() {
report = new GATKReport();
report.addTable(reportName, reportDescription);
table = report.getTable(reportName);
table.addPrimaryKey("key", false);
table.addColumn("readgroup", 0);
table.addColumn("cycle", 0);
table.addColumn("mismatches", 0);
table.addColumn("counts", 0);
table.addColumn("qual", 0);
table.addColumn("errorrate", 0.0f, "%.2e");
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( final PileupElement p : context.getBasePileup() ) {
final GATKSAMRecord read = p.getRead();
final int offset = p.getOffset();
final boolean firstOfPair = ! read.getReadPairedFlag() || read.getFirstOfPairFlag();
if ( firstOfPair && read.getMappingQuality() >= MIN_MAPPING_QUAL && p.getQual() >= MIN_BASE_QUAL ) {
final byte readBase = p.getBase();
final byte refBase = ref.getBase();
final int cycle = offset;
if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) {
final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle);
if ( ! table.containsKey(key) ) {
table.set(key, "cycle", cycle);
table.set(key, "readgroup", read.getReadGroup().getReadGroupId());
}
table.increment(key, "counts");
if (readBase != refBase)
table.increment(key, "mismatches");
}
}
}
return null;
}
public Integer reduceInit() { return null; }
public Integer reduce(Integer value, Integer sum) { return null; }
public void onTraversalDone(Integer sum) {
for ( final Object key : table.getPrimaryKeys() ) {
final int mismatches = (Integer)table.get(key, "mismatches");
final int count = (Integer)table.get(key, "counts");
final double errorRate = (mismatches + 1) / (1.0*(count + 1));
final int qual = QualityUtils.probToQual(1-errorRate, 0.0);
table.set(key, "qual", qual);
table.set(key, "errorrate", errorRate);
}
report.print(out);
}
}

View File

@ -94,9 +94,6 @@ import java.util.Map;
*
* @author Mark DePristo
*/
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
@Output
public PrintStream out;

View File

@ -26,6 +26,10 @@
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.*;
import org.apache.commons.jexl2.Expression;
import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.JexlEngine;
import org.apache.commons.jexl2.MapContext;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -71,7 +75,7 @@ import java.util.*;
* <p>
* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
* data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
* include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
* include additional statistics such as mismatches and base qualitites around the calls, read strandness (how many
* forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
* statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
* attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
@ -88,6 +92,16 @@ import java.util.*;
* bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
* on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
*
* Which (putative) calls will make it into the output file(s) is controlled by an expression/list of expressions passed with -filter
* flag: if any of the expressions evaluate to TRUE, the site will be discarded. Otherwise the putative call and all the
* associated statistics will be printed into the output. Expressions recognize the following variables(in paired-sample
* somatic mode variables are prefixed with T_ and N_ for Tumor and Normal, e.g. N_COV and T_COV are defined instead of COV):
* COV for coverage at the site, INDEL_F for fraction of reads supporting consensus indel at the site (wrt total coverage),
* INDEL_CF for fraction of reads with consensus indel wrt all reads with an indel at the site, CONS_CNT for the count of
* reads supporting the consensus indel at the site. Conventional arithmetic and logical operations are supported. For instance,
* N_COV<4||T_COV<6||T_INDEL_F<0.3||T_INDEL_CF<0.7 instructs the tool to only output indel calls with at least 30% observed
* allelic fraction and with consensus indel making at least 70% of all indel observations at the site, and only at the sites
* where tumor coverage and normal coverage are at least 6 and 4, respectively.
* <h2>Input</h2>
* <p>
* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
@ -147,30 +161,44 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
java.io.File bedOutput = null;
@Deprecated
@Argument(fullName="minCoverage", shortName="minCoverage",
doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
"with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
"with --unpaired (single sample) option, this value is used for minimum sample coverage. "+
"INSTEAD USE: T_COV<cutoff (or COV<cutoff in unpaired mode) in -filter expression (see -filter).",
required=false)
int minCoverage = 6;
@Deprecated
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
doc="used only in default (somatic) mode; normal sample must have at least minNormalCoverage "+
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored. "+
"INSTEAD USE: N_COV<cutoff in -filter expression (see -filter).", required=false)
int minNormalCoverage = 4;
@Deprecated
@Argument(fullName="minFraction", shortName="minFraction",
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, "+
"required for making a call"+
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction). "+
"INSTEAD USE: T_INDEL_F<cutoff (or INDEL_F<cutoff in unpaired mode) in -filter expression "+
"(see -filter).", required=false)
double minFraction = 0.3;
@Deprecated
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
"all indel observations at the site exceeds this threshold", required=false)
"all indel observations at the site exceeds this threshold. "+
"INSTEAD USE: T_INDEL_CF<cutoff (or INDEL_CF<cutoff in unpaired mode) in -filter expression "+
"(see -filter).", required=false)
double minConsensusFraction = 0.7;
@Deprecated
@Argument(fullName="minIndelCount", shortName="minCnt",
doc="Minimum count of reads supporting consensus indel required for making the call. "+
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
"(minIndelCount not met) will not pass.", required=false)
"(minIndelCount not met) will not pass. INSTEAD USE: T_CONS_CNT<cutoff "+
"(or CONS_CNT<cutoff in unpaired mode) in -filter expression (see -filter).", required=false)
int minIndelCount = 0;
@Argument(fullName="refseq", shortName="refseq",
@ -178,6 +206,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
"GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
String RefseqFileName = null;
@Argument(shortName="filter", doc="One or more logical expressions. If any of the expressions is TRUE, " +
"putative indel will be discarded and nothing will be printed into the output (unless genotyping "+
"at the specific position is explicitly requested, see -genotype). "+
"Default: T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7", required=false)
public ArrayList<String> FILTER_EXPRESSIONS = new ArrayList<String>();
//@Argument(fullName="blacklistedLanes", shortName="BL",
// doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
// "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
@ -221,7 +256,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
private Writer verboseWriter = null;
private static String annGenomic = "GENOMIC";
private static String annGenomic = "GENOMIC\t";
private static String annIntron = "INTRON";
private static String annUTR = "UTR";
private static String annCoding = "CODING";
@ -245,6 +280,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
// can be 1 base before lastGenotyped start
private JexlEngine jexlEngine = new JexlEngine();
private ArrayList<Expression> jexlExpressions = new ArrayList<Expression>();
// the following arrays store indel source-specific (normal/tumor) metric names
// for fast access when populating JEXL expression contexts (see IndelPrecall.fillContext())
private final static String[] normalMetricsCassette = new String[4];
private final static String[] tumorMetricsCassette = new String[4];
private final static String[] singleMetricsCassette = new String[4];
private final static int C_COV=0;
private final static int C_CONS_CNT=1;
private final static int C_INDEL_F=2;
private final static int C_INDEL_CF=3;
static {
normalMetricsCassette[C_COV] = "N_COV";
tumorMetricsCassette[C_COV] = "T_COV";
singleMetricsCassette[C_COV] = "COV";
normalMetricsCassette[C_CONS_CNT] = "N_CONS_CNT";
tumorMetricsCassette[C_CONS_CNT] = "T_CONS_CNT";
singleMetricsCassette[C_CONS_CNT] = "CONS_CNT";
normalMetricsCassette[C_INDEL_F] = "N_INDEL_F";
tumorMetricsCassette[C_INDEL_F] = "T_INDEL_F";
singleMetricsCassette[C_INDEL_F] = "INDEL_F";
normalMetricsCassette[C_INDEL_CF] = "N_INDEL_CF";
tumorMetricsCassette[C_INDEL_CF] = "T_INDEL_CF";
singleMetricsCassette[C_INDEL_CF] = "INDEL_CF";
}
// "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
@ -389,6 +450,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ;
refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile);
// Now initialize JEXL expressions:
if ( FILTER_EXPRESSIONS.size() == 0 ) {
if ( call_unpaired ) {
FILTER_EXPRESSIONS.add("COV<6||INDEL_F<0.3||INDEL_CF<0.7");
} else {
FILTER_EXPRESSIONS.add("T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7");
}
}
for ( String s : FILTER_EXPRESSIONS ) {
try {
Expression e = jexlEngine.createExpression(s);
jexlExpressions.add(e);
} catch (Exception e) {
throw new UserException.BadArgumentValue("Filter expression", "Invalid expression used (" + s + "). Please see the JEXL docs for correct syntax.") ;
}
}
}
@ -661,14 +740,26 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue;
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
JexlContext jc = new MapContext();
normalCall.fillContext(jc,singleMetricsCassette);
boolean discard_event = false;
if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
if ( DEBUG ) {
System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
}
continue; // low coverage
for ( Expression e : jexlExpressions ) {
if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; }
}
if ( discard_event && ! genotype ) {
normal_context.indelsAt(pos).clear();
continue; //*
}
// if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
// if ( DEBUG ) {
// System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
// }
// continue; // low coverage
// }
if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos);
long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
@ -697,24 +788,16 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos);
boolean haveCall = normalCall.isCall(); // cache the value
if ( haveCall || genotype) {
if ( haveCall ) normalCallsMade++;
printVCFLine(vcf_writer,normalCall);
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall);
lastGenotypedPosition = pos;
}
if ( ! discard_event ) normalCallsMade++;
printVCFLine(vcf_writer,normalCall, discard_event);
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, discard_event);
lastGenotypedPosition = pos;
normal_context.indelsAt(pos).clear();
// we dealt with this indel; don't want to see it again
// (we might otherwise in the case when 1) there is another indel that follows
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
// for ( IndelVariant var : variants ) {
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
// }
}
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
@ -829,18 +912,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH);
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
if ( DEBUG ) {
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
}
continue; // low coverage
JexlContext jc = new MapContext();
tumorCall.fillContext(jc,tumorMetricsCassette);
normalCall.fillContext(jc,normalMetricsCassette);
boolean discard_event = false;
for ( Expression e : jexlExpressions ) {
if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; }
}
if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
if ( DEBUG ) {
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
}
continue; // low coverage
if ( discard_event && ! genotype ) {
tumor_context.indelsAt(pos).clear();
normal_context.indelsAt(pos).clear();
continue; //*
}
// if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
// if ( DEBUG ) {
// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
// }
// continue; // low coverage
// }
// if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
// if ( DEBUG ) {
// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
// }
// continue; // low coverage
// }
if ( DEBUG ) {
System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, ");
@ -868,32 +965,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
// location = getToolkit().getGenomeLocParser().setStart(location,pos);
// location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data
boolean haveCall = tumorCall.isCall(); // cache the value
// boolean haveCall = tumorCall.isCall(); // cache the value
if ( haveCall || genotype ) {
if ( haveCall ) tumorCallsMade++;
if ( ! discard_event ) tumorCallsMade++;
printVCFLine(vcf_writer,normalCall,tumorCall);
printVCFLine(vcf_writer,normalCall,tumorCall,discard_event);
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall, discard_event );
lastGenotypedPosition = pos;
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall );
lastGenotypedPosition = pos;
}
tumor_context.indelsAt(pos).clear();
normal_context.indelsAt(pos).clear();
// we dealt with this indel; don't want to see it again
// (we might otherwise in the case when 1) there is another indel that follows
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
// for ( IndelVariant var : variants ) {
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
// }
}
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
@ -947,14 +1036,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) {
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, boolean discard_event) {
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
StringBuilder fullRecord = new StringBuilder();
fullRecord.append(makeFullRecord(normalCall));
fullRecord.append(annotationString);
if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
if ( discard_event && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
try {
verboseWriter.write(fullRecord.toString());
verboseWriter.write('\n');
@ -965,7 +1054,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) {
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall, boolean discard_event) {
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
@ -1013,7 +1102,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
fullRecord.append('\t');
fullRecord.append(annotationString);
if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
if ( discard_event && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
try {
verboseWriter.write(fullRecord.toString());
@ -1023,7 +1112,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
}
public void printVCFLine(VCFWriter vcf, IndelPrecall call) {
public void printVCFLine(VCFWriter vcf, IndelPrecall call, boolean discard_event) {
long start = call.getPosition()-1;
// If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
@ -1060,14 +1149,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
Map<String,Object> attrs = call.makeStatsAttributes(null);
if ( call.isCall() ) // we made a call - put actual het genotype here:
if ( ! discard_event ) // we made a call - put actual het genotype here:
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
}
Set<String> filters = null;
if ( call.getVariant() != null && ! call.isCall() ) {
if ( call.getVariant() != null && discard_event ) {
filters = new HashSet<String>();
filters.add("NoCall");
}
@ -1095,7 +1184,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
}
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) {
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall, boolean discard_event) {
long start = tCall.getPosition()-1;
long stop = start;
@ -1112,7 +1201,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
Map<String,Object> attrs = new HashMap();
boolean isSomatic = false;
if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) {
if ( nCall.getVariant() == null && tCall.getVariant() != null ) {
isSomatic = true;
attrs.put(VCFConstants.SOMATIC_KEY,true);
}
@ -1155,7 +1244,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
Set<String> filters = null;
if ( tCall.getVariant() != null && ! tCall.isCall() ) {
if ( tCall.getVariant() != null && discard_event ) {
filters = new HashSet<String>();
filters.add("NoCall");
}
@ -1602,6 +1691,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
public IndelVariant getVariant() { return consensus_indel; }
public void fillContext(JexlContext context,String[] cassette) {
context.set(cassette[C_INDEL_F],((double)consensus_indel_count)/total_coverage);
context.set(cassette[C_INDEL_CF],((double)consensus_indel_count/all_indel_count));
context.set(cassette[C_COV],total_coverage);
context.set(cassette[C_CONS_CNT],consensus_indel_count);
}
/*
public boolean isCall() {
boolean ret = ( consensus_indel_count >= minIndelCount &&
(double)consensus_indel_count > minFraction * total_coverage &&
@ -1610,10 +1706,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
" total_count="+all_indel_count+" cov="+total_coverage+
" minConsensusF="+((double)consensus_indel_count)/all_indel_count+
" minF="+((double)consensus_indel_count)/total_coverage);
return ret;
// return true;
}
*/
/** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed
* variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus)
* @param variants

View File

@ -134,6 +134,10 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
@Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false)
boolean sequenomOutput = false;
@Hidden
@Argument(doc="Use ILMN output format instead of regular FASTA",fullName="ilmn",required=false)
boolean ilmnOutput = false;
GenomeLoc prevInterval;
GenomeLoc allelePos;
@ -141,6 +145,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
StringBuilder sequence;
StringBuilder rawSequence;
boolean sequenceInvalid;
boolean isSiteSNP;
List<String> invReason;
int indelCounter;
@ -169,6 +174,9 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
header.setSequenceDictionary(referenceDictionary);
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
}
if (ilmnOutput)
out.println("Locus_Name,Target_Type,Sequence,Chromosome,Coordinate,Genome_Build_Version,Source,Source_Version,Sequence_Orientation,Plus_Minus,Force_Infinium_I");
}
public Integer reduceInit() {
@ -234,6 +242,8 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
}
rawSequence.append(Character.toUpperCase((char) ref.getBase()));
} else if ( validate != null ) {
// record variant type in case it's needed in output format
isSiteSNP = (validate.isSNP());
// doesn't matter if there's a mask here too -- this is what we want to validate
if ( validate.isFiltered() ) {
logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site.");
@ -496,13 +506,19 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
if (!sequenomOutput)
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
else {
if (sequenomOutput) {
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
probeName = probeName.replace("amplicon_","a");
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
}
else if (ilmnOutput) {
String type = isSiteSNP?"SNP":"INDEL";
seqIdentity = seqIdentity.replace("*",""); // no * in ref allele
out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart());
}
else{
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
}
}
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import com.google.java.contract.Requires;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.util.IntervalTree;
import net.sf.samtools.SAMSequenceRecord;
@ -19,11 +20,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
@ -32,7 +30,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -389,9 +386,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
nec.apply(tracker, ref, context, comp, eval);
}
// eval=null against all comps of different type
// eval=null against all comps of different type that aren't bound to another eval
for ( VariantContext otherComp : compSet ) {
if ( otherComp != comp ) {
if ( otherComp != comp && ! compHasMatchingEval(otherComp, evalSetBySample) ) {
synchronized (nec) {
nec.apply(tracker, ref, context, otherComp, null);
}
@ -409,6 +406,35 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
return null;
}
@Requires({"comp != null", "evals != null"})
private boolean compHasMatchingEval(final VariantContext comp, final Collection<VariantContext> evals) {
// find all of the matching comps
for ( final VariantContext eval : evals ) {
if ( eval != null && doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) != EvalCompMatchType.NO_MATCH )
return true;
}
// nothing matched
return false;
}
private enum EvalCompMatchType { NO_MATCH, STRICT, LENIENT }
@Requires({"eval != null", "comp != null"})
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
// find all of the matching comps
if ( comp.getType() != eval.getType() )
return EvalCompMatchType.NO_MATCH;
// find the comp which matches both the reference allele and alternate allele from eval
final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())))
return EvalCompMatchType.STRICT;
else
return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
}
private VariantContext findMatchingComp(final VariantContext eval, final Collection<VariantContext> comps) {
// if no comps, return null
if ( comps == null || comps.isEmpty() )
@ -419,26 +445,21 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
return comps.iterator().next();
// find all of the matching comps
List<VariantContext> matchingComps = new ArrayList<VariantContext>(comps.size());
for ( VariantContext comp : comps ) {
if ( comp.getType() == eval.getType() )
matchingComps.add(comp);
VariantContext lenientMatch = null;
for ( final VariantContext comp : comps ) {
switch ( doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) ) {
case STRICT:
return comp;
case LENIENT:
if ( lenientMatch == null ) lenientMatch = comp;
break;
case NO_MATCH:
;
}
}
// if no matching comp, return null
if ( matchingComps.size() == 0 )
return null;
// find the comp which matches both the reference allele and alternate allele from eval
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
for ( VariantContext comp : matchingComps ) {
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
return comp;
}
// if none match, just return the first one unless we require a strict match
return (requireStrictAlleleMatch ? null : matchingComps.get(0));
// nothing matched, just return lenientMatch, which might be null
return lenientMatch;
}
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }

View File

@ -417,8 +417,6 @@ public class VariantEvalUtils {
}
} else {
stateKeys.add(stateKey);
return stateKeys;
}
return stateKeys;

View File

@ -10,6 +10,8 @@ import net.sf.samtools.SAMUtils;
*/
public class QualityUtils {
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
public final static double MIN_REASONABLE_ERROR = 0.0001;
public final static byte MAX_REASONABLE_Q_SCORE = 40;
public final static byte MIN_USABLE_Q_SCORE = 6;

View File

@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
@ -273,7 +274,7 @@ public class UserException extends ReviewedStingException {
public static class IncompatibleSequenceDictionaries extends UserException {
public IncompatibleSequenceDictionaries(String message, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
super(String.format("Input files %s and %s have incompatible contigs: %s.\n %s contigs = %s\n %s contigs = %s",
name1, name2, message, name1, prettyPrintSequenceRecords(dict1), name2, prettyPrintSequenceRecords(dict2)));
name1, name2, message, name1, ReadUtils.prettyPrintSequenceRecords(dict1), name2, ReadUtils.prettyPrintSequenceRecords(dict2)));
}
}
@ -284,17 +285,11 @@ public class UserException extends ReviewedStingException {
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
+ "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
+ "\n %s contigs = %s",
name, name, prettyPrintSequenceRecords(dict)));
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
}
}
private static String prettyPrintSequenceRecords(SAMSequenceDictionary sequenceDictionary) {
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
int sequenceRecordIndex = 0;
for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences())
sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
return Arrays.deepToString(sequenceRecordNames);
}
public static class MissingWalker extends UserException {
public MissingWalker(String walkerName, String message) {

View File

@ -648,4 +648,12 @@ public class ReadUtils {
}
return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
}
public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) {
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
int sequenceRecordIndex = 0;
for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences())
sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
return Arrays.deepToString(sequenceRecordNames);
}
}

View File

@ -61,6 +61,8 @@ public abstract class BaseTest {
public static final String annotationDataLocation = GATKDataLocation + "Annotations/";
public static final String b37GoodBAM = validationDataLocation + "/CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
public static final String b37GoodNA12878BAM = validationDataLocation + "/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
public static final String b37_NA12878_OMNI = validationDataLocation + "/NA12878.omni.vcf";
public static final String refseqAnnotationLocation = annotationDataLocation + "refseq/";
public static final String hg18Refseq = refseqAnnotationLocation + "refGene-big-table-hg18.txt";

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -91,4 +92,16 @@ public class GATKBAMIndexUnitTest extends BaseTest {
Assert.assertEquals(bamIndex.getLevelSize(5),37448-4681+1);
}
@Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexWordBoundary() {
GATKBAMIndex index = new GATKBAMIndex(new File(validationDataLocation + "truncated_at_word_boundary.bai"));
index.readReferenceSequence(0);
}
@Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexNonWordBoundary() {
GATKBAMIndex index = new GATKBAMIndex(new File(validationDataLocation + "truncated_at_non_word_boundary.bai"));
index.readReferenceSequence(0);
}
}

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diagnostics;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
@Test
public void basicTest() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
1,
Arrays.asList("0cc212ecb6df300e321784039ff29f13"));
executeTest("ErrorRatePerCycle:", spec);
}
}

View File

@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("f909fd8374f663e983b9b3fda4cf5cf1")
Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943")
);
executeTest("testFunctionClassWithSnpeff", spec);
}
@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("4f60acc8a4b21c4b4ccb51ad9071449c"));
1, Arrays.asList("c49e239292704447a36e01ee9a71e729"));
executeTestParallel("testSelect1", spec);
}
@ -335,7 +335,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("190e1a171132832bf92fbca56a9c40bb"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -347,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("08586d443fdcf3b7f63b8f9e3a943c62"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -463,7 +463,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c")
);
executeTest("testModernVCFWithLargeIndels", spec);
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
@ -64,11 +65,21 @@ public class CryptUtilsUnitTest extends BaseTest {
@Test
public void testGATKMasterKeyPairMutualDecryption() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testGATKMasterKeyPairMutualDecryption"));
}
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey()));
}
@Test
public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption"));
}
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey()));
}
@ -156,6 +167,11 @@ public class CryptUtilsUnitTest extends BaseTest {
@Test
public void testLoadGATKMasterPrivateKey() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testLoadGATKMasterPrivateKey"));
}
PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
}
@ -174,4 +190,9 @@ public class CryptUtilsUnitTest extends BaseTest {
Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm());
Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat());
}
private boolean gatkPrivateKeyExistsButReadPermissionDenied() {
File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE);
return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead();
}
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.SkipException;
import org.testng.annotations.Test;
import org.testng.Assert;
@ -39,6 +40,11 @@ public class GATKKeyUnitTest extends BaseTest {
@Test
public void testCreateGATKKeyUsingMasterKeyPair() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testCreateGATKKeyUsingMasterKeyPair"));
}
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey();
@ -49,6 +55,11 @@ public class GATKKeyUnitTest extends BaseTest {
@Test
public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey"));
}
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
@ -82,8 +93,7 @@ public class GATKKeyUnitTest extends BaseTest {
KeyPair keyPair = CryptUtils.generateKeyPair();
// Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file:
GATKKey key = new GATKKey(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey(),
emailAddressWithNulByte);
GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), emailAddressWithNulByte);
}
@Test
@ -110,4 +120,9 @@ public class GATKKeyUnitTest extends BaseTest {
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile);
}
private boolean gatkPrivateKeyExistsButReadPermissionDenied() {
File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE);
return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead();
}
}