Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-03-08 12:30:04 -05:00
commit c04853eae6
39 changed files with 2425 additions and 312 deletions

View File

@ -47,6 +47,7 @@
<property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
<property name="resource.file" value="StingText.properties" />
<property name="resource.path" value="${java.classes}/StingText.properties" />
<property name="key.dir" value="${public.dir}/keys" />
<property name="scala.public.source.dir" value="${public.dir}/scala/src" />
<property name="scala.private.source.dir" value="${private.dir}/scala/src" />
@ -567,6 +568,7 @@
</fileset>
<fileset dir="${java.classes}" includes="**/commandline/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/pipeline/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/tools/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
@ -615,6 +617,9 @@
<include name="**/gatk/**/*.R"/>
<include name="**/alignment/**/*.R"/>
</fileset>
<fileset dir="${key.dir}">
<include name="**/*.key"/>
</fileset>
<manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
</manifest>
@ -879,6 +884,7 @@
<pathelement location="${R.tar.dir}" />
<pathelement location="${R.public.scripts.dir}" />
<pathelement location="${R.private.scripts.dir}" />
<pathelement location="${key.dir}" />
<path refid="external.dependencies" />
</path>

View File

@ -35,9 +35,12 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.crypt.CryptUtils;
import org.broadinstitute.sting.utils.crypt.GATKKey;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import java.security.PublicKey;
import java.util.*;
/**
@ -78,6 +81,9 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
Walker<?,?> walker = engine.getWalkerByName(getAnalysisName());
try {
// Make sure a valid GATK user key is present, if required.
authorizeGATKRun();
engine.setArguments(getArgumentCollection());
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
@ -130,6 +136,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
return 0;
}
/**
* Authorizes this run of the GATK by checking for a valid GATK user key, if required.
* Currently, a key is required only if running with the -et NO_ET or -et STDOUT options.
*/
private void authorizeGATKRun() {
if ( getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.NO_ET ||
getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
if ( getArgumentCollection().gatkKeyFile == null ) {
throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
"for more information and instructions on how to obtain a key.");
}
else {
PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();
GATKKey gatkUserKey = new GATKKey(gatkPublicKey, getArgumentCollection().gatkKeyFile);
if ( ! gatkUserKey.isValid() ) {
throw new UserException.KeySignatureVerificationException(getArgumentCollection().gatkKeyFile);
}
}
}
}
/**
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.

View File

@ -65,9 +65,12 @@ public class GATKArgumentCollection {
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
public Integer readBufferSize = null;
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
public File gatkKeyFile = null;
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
public List<String> readFilters = new ArrayList<String>();

View File

@ -154,9 +154,7 @@ public class GATKRunReport {
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
STANDARD,
/** Force output to STDOUT. For debugging only */
STDOUT,
/** Force output to S3. For debugging only */
AWS_S3 // todo -- remove me -- really just for testing purposes
STDOUT
}
private static final DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss");
@ -239,11 +237,8 @@ public class GATKRunReport {
case STDOUT:
postReportToStream(System.out);
break;
case AWS_S3:
postReportToAWSS3();
break;
default:
exceptDuringRunReport("BUG: unexcepted PhoneHomeOption ");
exceptDuringRunReport("BUG: unexpected PhoneHomeOption ");
break;
}
}

View File

@ -296,6 +296,10 @@ public class GATKReportTable {
return primaryKeyColumn.contains(primaryKey);
}
public Collection<Object> getPrimaryKeys() {
return Collections.unmodifiableCollection(primaryKeyColumn);
}
/**
* Set the value for a given position in the table
*

View File

@ -25,8 +25,14 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -39,6 +45,63 @@ import org.broadinstitute.sting.commandline.Hidden;
public class RecalibrationArgumentCollection {
/**
* This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference,
* so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.)
* for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
*/
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
protected List<RodBinding<Feature>> knownSites = Collections.emptyList();
/**
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
*/
@Gather(CountCovariatesGatherer.class)
@Output
protected PrintStream RECAL_FILE;
/**
* List all implemented covariates.
*/
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
protected boolean LIST_ONLY = false;
/**
* Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list.
*/
@Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
protected String[] COVARIATES = null;
/*
* Use the standard set of covariates in addition to the ones listed using the -cov argument
*/
@Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
protected boolean USE_STANDARD_COVARIATES = true;
/////////////////////////////
// Debugging-only Arguments
/////////////////////////////
/**
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
*/
@Hidden
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
protected boolean RUN_WITHOUT_DBSNP = false;
/////////////////////////////
// protected Member Variables
/////////////////////////////
protected final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables)
protected final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>();// A list to hold the covariate objects that were requested
protected final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
/**
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
* reads which have had the reference inserted because of color space inconsistencies.

View File

@ -29,6 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -115,6 +116,7 @@ import java.util.*;
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
@By(DataSource.REFERENCE)
@PartitionBy(PartitionType.NONE)
@Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE)
public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partition,Map<String,int[]>>, CoveragePartitioner> implements TreeReducible<CoveragePartitioner> {
@Output
@Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"})

View File

@ -0,0 +1,162 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
/**
* Computes the read error rate per position in read (in the original 5'->3' orientation that the read had coming off the machine)
*
* Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read
* group in the input BAMs FOR ONLY THE FIRST OF PAIR READS.
*
* <h2>Input</h2>
* <p>
* Any number of BAM files
* </p>
*
* <h2>Output</h2>
* <p>
* GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate.
*
* For example, running this tool on the NA12878 data sets:
*
* <pre>
* ##:GATKReport.v0.2 ErrorRatePerCycle : The error rate per sequenced position in the reads
* readgroup cycle mismatches counts qual errorrate
* 20FUK.1 0 80 23368 25 3.47e-03
* 20FUK.1 1 40 23433 28 1.75e-03
* 20FUK.1 2 36 23453 28 1.58e-03
* 20FUK.1 3 26 23476 29 1.15e-03
* 20FUK.1 4 32 23495 29 1.40e-03
* up to 101 cycles
* 20FUK.2 0 77 20886 24 3.73e-03
* 20FUK.2 1 28 20920 29 1.39e-03
* 20FUK.2 2 24 20931 29 1.19e-03
* 20FUK.2 3 30 20940 28 1.48e-03
* 20FUK.2 4 25 20948 29 1.24e-03
* up to 101 cycles
* 20FUK.3 0 78 22038 24 3.58e-03
* 20FUK.3 1 40 22091 27 1.86e-03
* 20FUK.3 2 23 22108 30 1.09e-03
* 20FUK.3 3 36 22126 28 1.67e-03
* </pre>
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T ErrorRatePerCycle
* -I bundle/current/b37/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam
* -R bundle/current/b37/human_g1k_v37.fasta
* -o example.gatkreport.txt
* </pre>
*
* @author Kiran Garimella, Mark DePristo
*/
public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
@Output PrintStream out;
@Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false)
public Integer MIN_BASE_QUAL = 0;
@Argument(fullName="min_mapping_quality_score", shortName="mmq", doc="Minimum read mapping quality required to consider a read for calling", required=false)
public Integer MIN_MAPPING_QUAL = 20;
private GATKReport report;
private GATKReportTable table;
private final static String reportName = "ErrorRatePerCycle";
private final static String reportDescription = "The error rate per sequenced position in the reads";
/**
* Allows us to use multiple records for the key (read group x cycle)
*/
private static class TableKey implements Comparable<TableKey> {
final String readGroup;
final int cycle;
private TableKey(final String readGroup, final int cycle) {
this.readGroup = readGroup;
this.cycle = cycle;
}
@Override
public int compareTo(final TableKey tableKey) {
final int scmp = readGroup.compareTo(tableKey.readGroup);
if ( scmp == 0 )
return Integer.valueOf(cycle).compareTo(tableKey.cycle);
else
return scmp;
}
}
public void initialize() {
report = new GATKReport();
report.addTable(reportName, reportDescription);
table = report.getTable(reportName);
table.addPrimaryKey("key", false);
table.addColumn("readgroup", 0);
table.addColumn("cycle", 0);
table.addColumn("mismatches", 0);
table.addColumn("counts", 0);
table.addColumn("qual", 0);
table.addColumn("errorrate", 0.0f, "%.2e");
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( final PileupElement p : context.getBasePileup() ) {
final GATKSAMRecord read = p.getRead();
final int offset = p.getOffset();
final boolean firstOfPair = ! read.getReadPairedFlag() || read.getFirstOfPairFlag();
if ( firstOfPair && read.getMappingQuality() >= MIN_MAPPING_QUAL && p.getQual() >= MIN_BASE_QUAL ) {
final byte readBase = p.getBase();
final byte refBase = ref.getBase();
final int cycle = offset;
if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) {
final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle);
if ( ! table.containsKey(key) ) {
table.set(key, "cycle", cycle);
table.set(key, "readgroup", read.getReadGroup().getReadGroupId());
}
table.increment(key, "counts");
if (readBase != refBase)
table.increment(key, "mismatches");
}
}
}
return null;
}
public Integer reduceInit() { return null; }
public Integer reduce(Integer value, Integer sum) { return null; }
public void onTraversalDone(Integer sum) {
for ( final Object key : table.getPrimaryKeys() ) {
final int mismatches = (Integer)table.get(key, "mismatches");
final int count = (Integer)table.get(key, "counts");
final double errorRate = (mismatches + 1) / (1.0*(count + 1));
final int qual = QualityUtils.probToQual(1-errorRate, 0.0);
table.set(key, "qual", qual);
table.set(key, "errorrate", errorRate);
}
report.print(out);
}
}

View File

@ -94,9 +94,6 @@ import java.util.Map;
*
* @author Mark DePristo
*/
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
@Output
public PrintStream out;

View File

@ -454,8 +454,7 @@ public class HaplotypeIndelErrorModel {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup.
// Second term is a constant added to both likelihoods so will be ignored
haplotypeLikehoodMatrix[i][j] += MathUtils.softMax(readLikelihood[0],
readLikelihood[1]);
haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]);
}

View File

@ -166,18 +166,17 @@ public class PairHMMIndelErrorModel {
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
YMetricArray[im1][jm1] + pBaseRead);
matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]});
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
// update Y array
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
}
}
@ -316,9 +315,7 @@ public class PairHMMIndelErrorModel {
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
XMetricArray[bestI][bestJ],
YMetricArray[bestI][bestJ]);
final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] });
/*
if (DEBUG) {
@ -651,7 +648,7 @@ public class PairHMMIndelErrorModel {
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplified to just a single loop without the intermediate NxN matrix
for (int i=0; i < numHaplotypes; i++) {
for (int j=i; j < numHaplotypes; j++){
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
@ -665,7 +662,7 @@ public class PairHMMIndelErrorModel {
final double li = readLikelihoods[readIdx][i];
final double lj = readLikelihoods[readIdx][j];
final int readCount = readCounts[readIdx];
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + LOG_ONE_HALF);
}
}
}
@ -678,7 +675,7 @@ public class PairHMMIndelErrorModel {
}
}
// renormalize so that max element is zero.
// renormalize so that max element is zero.
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
}
}

View File

@ -26,6 +26,10 @@
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.*;
import org.apache.commons.jexl2.Expression;
import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.JexlEngine;
import org.apache.commons.jexl2.MapContext;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -71,7 +75,7 @@ import java.util.*;
* <p>
* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
* data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
* include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
* include additional statistics such as mismatches and base qualitites around the calls, read strandness (how many
* forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
* statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
* attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
@ -88,6 +92,16 @@ import java.util.*;
* bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
* on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
*
* Which (putative) calls will make it into the output file(s) is controlled by an expression/list of expressions passed with -filter
* flag: if any of the expressions evaluate to TRUE, the site will be discarded. Otherwise the putative call and all the
* associated statistics will be printed into the output. Expressions recognize the following variables(in paired-sample
* somatic mode variables are prefixed with T_ and N_ for Tumor and Normal, e.g. N_COV and T_COV are defined instead of COV):
* COV for coverage at the site, INDEL_F for fraction of reads supporting consensus indel at the site (wrt total coverage),
* INDEL_CF for fraction of reads with consensus indel wrt all reads with an indel at the site, CONS_CNT for the count of
* reads supporting the consensus indel at the site. Conventional arithmetic and logical operations are supported. For instance,
* N_COV<4||T_COV<6||T_INDEL_F<0.3||T_INDEL_CF<0.7 instructs the tool to only output indel calls with at least 30% observed
* allelic fraction and with consensus indel making at least 70% of all indel observations at the site, and only at the sites
* where tumor coverage and normal coverage are at least 6 and 4, respectively.
* <h2>Input</h2>
* <p>
* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
@ -147,30 +161,44 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
java.io.File bedOutput = null;
@Deprecated
@Argument(fullName="minCoverage", shortName="minCoverage",
doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
"with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
"with --unpaired (single sample) option, this value is used for minimum sample coverage. "+
"INSTEAD USE: T_COV<cutoff (or COV<cutoff in unpaired mode) in -filter expression (see -filter).",
required=false)
int minCoverage = 6;
@Deprecated
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
doc="used only in default (somatic) mode; normal sample must have at least minNormalCoverage "+
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored. "+
"INSTEAD USE: N_COV<cutoff in -filter expression (see -filter).", required=false)
int minNormalCoverage = 4;
@Deprecated
@Argument(fullName="minFraction", shortName="minFraction",
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, "+
"required for making a call"+
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction). "+
"INSTEAD USE: T_INDEL_F<cutoff (or INDEL_F<cutoff in unpaired mode) in -filter expression "+
"(see -filter).", required=false)
double minFraction = 0.3;
@Deprecated
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
"all indel observations at the site exceeds this threshold", required=false)
"all indel observations at the site exceeds this threshold. "+
"INSTEAD USE: T_INDEL_CF<cutoff (or INDEL_CF<cutoff in unpaired mode) in -filter expression "+
"(see -filter).", required=false)
double minConsensusFraction = 0.7;
@Deprecated
@Argument(fullName="minIndelCount", shortName="minCnt",
doc="Minimum count of reads supporting consensus indel required for making the call. "+
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
"(minIndelCount not met) will not pass.", required=false)
"(minIndelCount not met) will not pass. INSTEAD USE: T_CONS_CNT<cutoff "+
"(or CONS_CNT<cutoff in unpaired mode) in -filter expression (see -filter).", required=false)
int minIndelCount = 0;
@Argument(fullName="refseq", shortName="refseq",
@ -178,6 +206,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
"GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
String RefseqFileName = null;
@Argument(shortName="filter", doc="One or more logical expressions. If any of the expressions is TRUE, " +
"putative indel will be discarded and nothing will be printed into the output (unless genotyping "+
"at the specific position is explicitly requested, see -genotype). "+
"Default: T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7", required=false)
public ArrayList<String> FILTER_EXPRESSIONS = new ArrayList<String>();
//@Argument(fullName="blacklistedLanes", shortName="BL",
// doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
// "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
@ -221,7 +256,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
private Writer verboseWriter = null;
private static String annGenomic = "GENOMIC";
private static String annGenomic = "GENOMIC\t";
private static String annIntron = "INTRON";
private static String annUTR = "UTR";
private static String annCoding = "CODING";
@ -245,6 +280,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
// can be 1 base before lastGenotyped start
private JexlEngine jexlEngine = new JexlEngine();
private ArrayList<Expression> jexlExpressions = new ArrayList<Expression>();
// the following arrays store indel source-specific (normal/tumor) metric names
// for fast access when populating JEXL expression contexts (see IndelPrecall.fillContext())
private final static String[] normalMetricsCassette = new String[4];
private final static String[] tumorMetricsCassette = new String[4];
private final static String[] singleMetricsCassette = new String[4];
private final static int C_COV=0;
private final static int C_CONS_CNT=1;
private final static int C_INDEL_F=2;
private final static int C_INDEL_CF=3;
static {
normalMetricsCassette[C_COV] = "N_COV";
tumorMetricsCassette[C_COV] = "T_COV";
singleMetricsCassette[C_COV] = "COV";
normalMetricsCassette[C_CONS_CNT] = "N_CONS_CNT";
tumorMetricsCassette[C_CONS_CNT] = "T_CONS_CNT";
singleMetricsCassette[C_CONS_CNT] = "CONS_CNT";
normalMetricsCassette[C_INDEL_F] = "N_INDEL_F";
tumorMetricsCassette[C_INDEL_F] = "T_INDEL_F";
singleMetricsCassette[C_INDEL_F] = "INDEL_F";
normalMetricsCassette[C_INDEL_CF] = "N_INDEL_CF";
tumorMetricsCassette[C_INDEL_CF] = "T_INDEL_CF";
singleMetricsCassette[C_INDEL_CF] = "INDEL_CF";
}
// "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
@ -389,6 +450,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ;
refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile);
// Now initialize JEXL expressions:
if ( FILTER_EXPRESSIONS.size() == 0 ) {
if ( call_unpaired ) {
FILTER_EXPRESSIONS.add("COV<6||INDEL_F<0.3||INDEL_CF<0.7");
} else {
FILTER_EXPRESSIONS.add("T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7");
}
}
for ( String s : FILTER_EXPRESSIONS ) {
try {
Expression e = jexlEngine.createExpression(s);
jexlExpressions.add(e);
} catch (Exception e) {
throw new UserException.BadArgumentValue("Filter expression", "Invalid expression used (" + s + "). Please see the JEXL docs for correct syntax.") ;
}
}
}
@ -661,14 +740,26 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue;
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
JexlContext jc = new MapContext();
normalCall.fillContext(jc,singleMetricsCassette);
boolean discard_event = false;
if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
if ( DEBUG ) {
System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
}
continue; // low coverage
for ( Expression e : jexlExpressions ) {
if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; }
}
if ( discard_event && ! genotype ) {
normal_context.indelsAt(pos).clear();
continue; //*
}
// if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
// if ( DEBUG ) {
// System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
// }
// continue; // low coverage
// }
if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos);
long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
@ -697,24 +788,16 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos);
boolean haveCall = normalCall.isCall(); // cache the value
if ( haveCall || genotype) {
if ( haveCall ) normalCallsMade++;
printVCFLine(vcf_writer,normalCall);
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall);
lastGenotypedPosition = pos;
}
if ( ! discard_event ) normalCallsMade++;
printVCFLine(vcf_writer,normalCall, discard_event);
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, discard_event);
lastGenotypedPosition = pos;
normal_context.indelsAt(pos).clear();
// we dealt with this indel; don't want to see it again
// (we might otherwise in the case when 1) there is another indel that follows
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
// for ( IndelVariant var : variants ) {
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
// }
}
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
@ -829,18 +912,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH);
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
if ( DEBUG ) {
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
}
continue; // low coverage
JexlContext jc = new MapContext();
tumorCall.fillContext(jc,tumorMetricsCassette);
normalCall.fillContext(jc,normalMetricsCassette);
boolean discard_event = false;
for ( Expression e : jexlExpressions ) {
if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; }
}
if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
if ( DEBUG ) {
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
}
continue; // low coverage
if ( discard_event && ! genotype ) {
tumor_context.indelsAt(pos).clear();
normal_context.indelsAt(pos).clear();
continue; //*
}
// if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
// if ( DEBUG ) {
// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
// }
// continue; // low coverage
// }
// if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
// if ( DEBUG ) {
// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
// }
// continue; // low coverage
// }
if ( DEBUG ) {
System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, ");
@ -868,32 +965,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
// location = getToolkit().getGenomeLocParser().setStart(location,pos);
// location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data
boolean haveCall = tumorCall.isCall(); // cache the value
// boolean haveCall = tumorCall.isCall(); // cache the value
if ( haveCall || genotype ) {
if ( haveCall ) tumorCallsMade++;
if ( ! discard_event ) tumorCallsMade++;
printVCFLine(vcf_writer,normalCall,tumorCall);
printVCFLine(vcf_writer,normalCall,tumorCall,discard_event);
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall, discard_event );
lastGenotypedPosition = pos;
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall );
lastGenotypedPosition = pos;
}
tumor_context.indelsAt(pos).clear();
normal_context.indelsAt(pos).clear();
// we dealt with this indel; don't want to see it again
// (we might otherwise in the case when 1) there is another indel that follows
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
// for ( IndelVariant var : variants ) {
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
// }
}
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
@ -947,14 +1036,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) {
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, boolean discard_event) {
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
StringBuilder fullRecord = new StringBuilder();
fullRecord.append(makeFullRecord(normalCall));
fullRecord.append(annotationString);
if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
if ( discard_event && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
try {
verboseWriter.write(fullRecord.toString());
verboseWriter.write('\n');
@ -965,7 +1054,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) {
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall, boolean discard_event) {
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
@ -1013,7 +1102,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
fullRecord.append('\t');
fullRecord.append(annotationString);
if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
if ( discard_event && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
try {
verboseWriter.write(fullRecord.toString());
@ -1023,7 +1112,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
}
public void printVCFLine(VCFWriter vcf, IndelPrecall call) {
public void printVCFLine(VCFWriter vcf, IndelPrecall call, boolean discard_event) {
long start = call.getPosition()-1;
// If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
@ -1060,14 +1149,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
Map<String,Object> attrs = call.makeStatsAttributes(null);
if ( call.isCall() ) // we made a call - put actual het genotype here:
if ( ! discard_event ) // we made a call - put actual het genotype here:
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
}
Set<String> filters = null;
if ( call.getVariant() != null && ! call.isCall() ) {
if ( call.getVariant() != null && discard_event ) {
filters = new HashSet<String>();
filters.add("NoCall");
}
@ -1095,7 +1184,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
}
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) {
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall, boolean discard_event) {
long start = tCall.getPosition()-1;
long stop = start;
@ -1112,7 +1201,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
Map<String,Object> attrs = new HashMap();
boolean isSomatic = false;
if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) {
if ( nCall.getVariant() == null && tCall.getVariant() != null ) {
isSomatic = true;
attrs.put(VCFConstants.SOMATIC_KEY,true);
}
@ -1155,7 +1244,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
}
Set<String> filters = null;
if ( tCall.getVariant() != null && ! tCall.isCall() ) {
if ( tCall.getVariant() != null && discard_event ) {
filters = new HashSet<String>();
filters.add("NoCall");
}
@ -1602,6 +1691,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
public IndelVariant getVariant() { return consensus_indel; }
public void fillContext(JexlContext context,String[] cassette) {
context.set(cassette[C_INDEL_F],((double)consensus_indel_count)/total_coverage);
context.set(cassette[C_INDEL_CF],((double)consensus_indel_count/all_indel_count));
context.set(cassette[C_COV],total_coverage);
context.set(cassette[C_CONS_CNT],consensus_indel_count);
}
/*
public boolean isCall() {
boolean ret = ( consensus_indel_count >= minIndelCount &&
(double)consensus_indel_count > minFraction * total_coverage &&
@ -1610,10 +1706,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
" total_count="+all_indel_count+" cov="+total_coverage+
" minConsensusF="+((double)consensus_indel_count)/all_indel_count+
" minF="+((double)consensus_indel_count)/total_coverage);
return ret;
// return true;
}
*/
/** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed
* variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus)
* @param variants

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import com.google.java.contract.Requires;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.util.IntervalTree;
import net.sf.samtools.SAMSequenceRecord;
@ -19,11 +20,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
@ -32,7 +30,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -389,9 +386,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
nec.apply(tracker, ref, context, comp, eval);
}
// eval=null against all comps of different type
// eval=null against all comps of different type that aren't bound to another eval
for ( VariantContext otherComp : compSet ) {
if ( otherComp != comp ) {
if ( otherComp != comp && ! compHasMatchingEval(otherComp, evalSetBySample) ) {
synchronized (nec) {
nec.apply(tracker, ref, context, otherComp, null);
}
@ -409,6 +406,35 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
return null;
}
@Requires({"comp != null", "evals != null"})
private boolean compHasMatchingEval(final VariantContext comp, final Collection<VariantContext> evals) {
// find all of the matching comps
for ( final VariantContext eval : evals ) {
if ( eval != null && doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) != EvalCompMatchType.NO_MATCH )
return true;
}
// nothing matched
return false;
}
private enum EvalCompMatchType { NO_MATCH, STRICT, LENIENT }
@Requires({"eval != null", "comp != null"})
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
// find all of the matching comps
if ( comp.getType() != eval.getType() )
return EvalCompMatchType.NO_MATCH;
// find the comp which matches both the reference allele and alternate allele from eval
final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())))
return EvalCompMatchType.STRICT;
else
return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
}
private VariantContext findMatchingComp(final VariantContext eval, final Collection<VariantContext> comps) {
// if no comps, return null
if ( comps == null || comps.isEmpty() )
@ -419,26 +445,21 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
return comps.iterator().next();
// find all of the matching comps
List<VariantContext> matchingComps = new ArrayList<VariantContext>(comps.size());
for ( VariantContext comp : comps ) {
if ( comp.getType() == eval.getType() )
matchingComps.add(comp);
VariantContext lenientMatch = null;
for ( final VariantContext comp : comps ) {
switch ( doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) ) {
case STRICT:
return comp;
case LENIENT:
if ( lenientMatch == null ) lenientMatch = comp;
break;
case NO_MATCH:
;
}
}
// if no matching comp, return null
if ( matchingComps.size() == 0 )
return null;
// find the comp which matches both the reference allele and alternate allele from eval
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
for ( VariantContext comp : matchingComps ) {
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
return comp;
}
// if none match, just return the first one unless we require a strict match
return (requireStrictAlleleMatch ? null : matchingComps.get(0));
// nothing matched, just return lenientMatch, which might be null
return lenientMatch;
}
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }

View File

@ -417,8 +417,6 @@ public class VariantEvalUtils {
}
} else {
stateKeys.add(stateKey);
return stateKeys;
}
return stateKeys;

View File

@ -140,15 +140,16 @@ public class GaussianMixtureModel {
}
for( final VariantDatum datum : data ) {
final ArrayList<Double> pVarInGaussianLog10 = new ArrayList<Double>( gaussians.size() );
final double[] pVarInGaussianLog10 = new double[gaussians.size()];
int gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
final double pVarLog10 = gaussian.evaluateDatumLog10( datum );
pVarInGaussianLog10.add( pVarLog10 );
pVarInGaussianLog10[gaussianIndex++] = pVarLog10;
}
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10 );
int iii = 0;
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false );
gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[iii++] ); //BUGBUG: to clean up
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] );
}
}
}

View File

@ -372,6 +372,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
stream.println("library(ggplot2)");
// For compactPDF in R 2.13+
stream.println("library(tools)");
// For graphical functions R 2.14.2+
stream.println("library(grid)");
createArrangeFunction( stream );

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
@ -113,24 +114,28 @@ public class Haplotype {
return isReference;
}
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final byte[] paddedRef, final int refStart,
final Cigar haplotypeCigar, final int numBasesAddedToStartOfHaplotype, final int refHaplotypeLength ) {
@Requires({"refInsertLocation >= 0", "hapStartInRefCoords >= 0"})
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) {
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation);
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(hapStartInRefCoords, haplotypeCigar, refInsertLocation);
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
return getBases().clone();
return bases.clone();
}
haplotypeInsertLocation += numBasesAddedToStartOfHaplotype;
final byte[] newHaplotype = getBases().clone();
byte[] newHaplotype;
try {
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
newHaplotype = bases.clone();
for( int iii = 0; iii < altAllele.length(); iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
} else if( refAllele.length() < altAllele.length() ) { // insertion
} else if( refAllele.length() < altAllele.length() ) { // insertion
final int altAlleleLength = altAllele.length();
newHaplotype = new byte[bases.length + altAlleleLength];
for( int iii = 0; iii < bases.length; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
}
@ -138,24 +143,17 @@ public class Haplotype {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
} else { // deletion
int refHaplotypeOffset = 0;
for( final CigarElement ce : haplotypeCigar.getCigarElements()) {
if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); }
else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); }
}
for( int iii = 0; iii < altAllele.length(); iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
final int shift = refAllele.length() - altAllele.length();
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) {
newHaplotype[iii] = newHaplotype[iii+shift];
newHaplotype = new byte[bases.length - shift];
for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = 0; iii < shift; iii++ ) {
newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii];
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
newHaplotype[iii] = bases[iii+shift];
}
}
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
return getBases().clone();
return bases.clone();
}
return newHaplotype;

View File

@ -41,14 +41,6 @@ import java.util.*;
* @author Kiran Garimella
*/
public class MathUtils {
/** Public constants - used for the Lanczos approximation to the factorial function
* (for the calculation of the binomial/multinomial probability in logspace)
* @param LANC_SEQ[] - an array holding the constants which correspond to the product
* of Chebyshev Polynomial coefficients, and points on the Gamma function (for interpolation)
* [see A Precision Approximation of the Gamma Function J. SIAM Numer. Anal. Ser. B, Vol. 1 1964. pp. 86-96]
* @param LANC_G - a value for the Lanczos approximation to the gamma function that works to
* high precision
*/
/**
* Private constructor. No instantiating this class!
@ -56,6 +48,28 @@ public class MathUtils {
private MathUtils() {
}
public static final double[] log10Cache;
private static final double[] jacobianLogTable;
private static final double JACOBIAN_LOG_TABLE_STEP = 0.001;
private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1;
private static final int MAXN = 11000;
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
static {
log10Cache = new double[LOG10_CACHE_SIZE];
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
log10Cache[0] = Double.NEGATIVE_INFINITY;
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
log10Cache[k] = Math.log10(k);
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
}
}
// A fast implementation of the Math.round() method. This method does not perform
// under/overflow checking, so this shouldn't be used in the general case (but is fine
// if one is already make those checks before calling in to the rounding).
@ -536,7 +550,7 @@ public class MathUtils {
// all negative) the largest value; also, we need to convert to normal-space.
double maxValue = Utils.findMaxEntry(array);
// we may decide to just normalize in log space with converting to linear space
// we may decide to just normalize in log space without converting to linear space
if (keepInLogSpace) {
for (int i = 0; i < array.length; i++)
array[i] -= maxValue;
@ -563,29 +577,6 @@ public class MathUtils {
return normalized;
}
public static double[] normalizeFromLog10(List<Double> array, boolean takeLog10OfOutput) {
double[] normalized = new double[array.size()];
// for precision purposes, we need to add (or really subtract, since they're
// all negative) the largest value; also, we need to convert to normal-space.
double maxValue = MathUtils.arrayMaxDouble(array);
for (int i = 0; i < array.size(); i++)
normalized[i] = Math.pow(10, array.get(i) - maxValue);
// normalize
double sum = 0.0;
for (int i = 0; i < array.size(); i++)
sum += normalized[i];
for (int i = 0; i < array.size(); i++) {
double x = normalized[i] / sum;
if (takeLog10OfOutput)
x = Math.log10(x);
normalized[i] = x;
}
return normalized;
}
/**
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
*
@ -596,10 +587,6 @@ public class MathUtils {
return normalizeFromLog10(array, false);
}
public static double[] normalizeFromLog10(List<Double> array) {
return normalizeFromLog10(array, false);
}
public static int maxElementIndex(final double[] array) {
return maxElementIndex(array, array.length);
}
@ -1207,78 +1194,11 @@ public class MathUtils {
return ((double) num) / (Math.max(denom, 1));
}
public static final double[] log10Cache;
public static final double[] jacobianLogTable;
public static final int JACOBIAN_LOG_TABLE_SIZE = 101;
public static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP;
public static final double MAX_JACOBIAN_TOLERANCE = 10.0;
private static final int MAXN = 11000;
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
static {
log10Cache = new double[LOG10_CACHE_SIZE];
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
log10Cache[0] = Double.NEGATIVE_INFINITY;
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
log10Cache[k] = Math.log10(k);
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
}
}
static public double softMax(final double[] vec) {
double acc = vec[0];
for (int k = 1; k < vec.length; k++)
acc = softMax(acc, vec[k]);
return acc;
}
static public double max(double x0, double x1, double x2) {
double a = Math.max(x0, x1);
return Math.max(a, x2);
}
static public double softMax(final double x0, final double x1, final double x2) {
// compute naively log10(10^x[0] + 10^x[1]+...)
// return Math.log10(MathUtils.sumLog10(vec));
// better approximation: do Jacobian logarithm function on data pairs
double a = softMax(x0, x1);
return softMax(a, x2);
}
static public double softMax(final double x, final double y) {
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
// slow exact version:
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
double diff = x - y;
if (diff > MAX_JACOBIAN_TOLERANCE)
return x;
else if (diff < -MAX_JACOBIAN_TOLERANCE)
return y;
else if (diff >= 0) {
int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
return x + jacobianLogTable[ind];
}
else {
int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
return y + jacobianLogTable[ind];
}
}
public static double phredScaleToProbability(byte q) {
return Math.pow(10, (-q) / 10.0);
}
@ -1734,6 +1654,4 @@ public class MathUtils {
return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
}
}

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -314,6 +315,15 @@ public class ClippingOp {
if (start == 0)
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
if (read.hasBaseIndelQualities()) {
byte[] newBaseInsertionQuals = new byte[newLength];
byte[] newBaseDeletionQuals = new byte[newLength];
System.arraycopy(read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength);
System.arraycopy(read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength);
hardClippedRead.setBaseQualities(newBaseInsertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
hardClippedRead.setBaseQualities(newBaseDeletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
}
return hardClippedRead;
}

View File

@ -0,0 +1,390 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.io.IOUtils;
import javax.crypto.Cipher;
import java.io.File;
import java.io.InputStream;
import java.security.*;
import java.security.spec.InvalidKeySpecException;
import java.security.spec.KeySpec;
import java.security.spec.PKCS8EncodedKeySpec;
import java.security.spec.X509EncodedKeySpec;
import java.util.Arrays;
/**
* A set of cryptographic utility methods and constants.
*
* Contains methods to:
*
* -Create a public/private key pair
* -Read and write public/private keys to/from files/streams
* -Load the GATK master private/public keys
* -Encrypt/decrypt data
*
* Also contains constants that control the cryptographic defaults
* throughout the GATK.
*
* @author David Roazen
*/
public class CryptUtils {
// ---------------------------------------------------------------------------------
// Constants (these control the default cryptographic settings throughout the GATK):
// ---------------------------------------------------------------------------------
/**
* Default key length in bits of newly-created keys. 2048 bits provides a good balance between
* security and speed.
*/
public static final int DEFAULT_KEY_LENGTH = 2048;
/**
* Default encryption algorithm to use, when none is specified.
*/
public static final String DEFAULT_ENCRYPTION_ALGORITHM = "RSA";
/**
* Default random-number generation algorithm to use, when none is specified.
*/
public static final String DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM = "SHA1PRNG";
/**
* Name of the public key file distributed with the GATK. This file is packaged
* into the GATK jar, and we use the system ClassLoader to find it.
*/
public static final String GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME = "GATK_public.key";
/**
* Location of the master copy of the GATK private key.
*/
public static final String GATK_MASTER_PRIVATE_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_private.key";
/**
* Location of the master copy of the GATK public key. This file should always be the same as
* the public key file distributed with the GATK (and there are automated tests to ensure that it is).
*/
public static final String GATK_MASTER_PUBLIC_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_public.key";
/**
* Directory where generated GATK user keys are stored. See the GATKKey class for more information.
*/
public static final String GATK_USER_KEY_DIRECTORY = "/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/";
// -----------------------
// Utility Methods:
// -----------------------
/**
* Generate a new public/private key pair using the default encryption settings defined above.
*
* @return A new public/private key pair created using the default settings
*/
public static KeyPair generateKeyPair() {
return generateKeyPair(DEFAULT_KEY_LENGTH, DEFAULT_ENCRYPTION_ALGORITHM, DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
}
/**
* Generate a new public/private key pair using custom encryption settings.
*
* @param keyLength Length of the key in bits
* @param encryptionAlgorithm Encryption algorithm to use
* @param randNumberAlgorithm Random-number generation algorithm to use
* @return A new public/private key pair, created according to the specified parameters
*/
public static KeyPair generateKeyPair( int keyLength, String encryptionAlgorithm, String randNumberAlgorithm ) {
try {
KeyPairGenerator keyGen = KeyPairGenerator.getInstance(encryptionAlgorithm);
SecureRandom randomnessSource = createRandomnessSource(randNumberAlgorithm);
keyGen.initialize(keyLength, randomnessSource);
return keyGen.generateKeyPair();
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
}
catch ( Exception e ) {
throw new ReviewedStingException("Error while generating key pair", e);
}
}
/**
* Create a source of randomness using the default random-number generation algorithm.
*
* @return A randomness source that uses the default algorithm
*/
public static SecureRandom createRandomnessSource() {
return createRandomnessSource(DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
}
/**
* Create a source of randomness using a custom random-number generation algorithm.
*
* @param randAlgorithm The random-number generation algorithm to use
* @return A randomness sources that uses the specified algorithm
*/
public static SecureRandom createRandomnessSource ( String randAlgorithm ) {
try {
return SecureRandom.getInstance(randAlgorithm);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested random-number generation algorithm %s", randAlgorithm), e);
}
}
/**
* Writes a public/private key pair to disk
*
* @param keyPair The key pair we're writing to disk
* @param privateKeyFile Location to write the private key
* @param publicKeyFile Location to write the public key
*/
public static void writeKeyPair ( KeyPair keyPair, File privateKeyFile, File publicKeyFile ) {
writeKey(keyPair.getPrivate(), privateKeyFile);
writeKey(keyPair.getPublic(), publicKeyFile);
}
/**
* Writes an arbitrary key to disk
*
* @param key The key to write
* @param destination Location to write the key to
*/
public static void writeKey ( Key key, File destination ) {
IOUtils.writeByteArrayToFile(key.getEncoded(), destination);
}
/**
* Reads in a public key created using the default encryption algorithm from a file.
*
* @param source File containing the public key
* @return The public key read
*/
public static PublicKey readPublicKey ( File source ) {
return decodePublicKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
}
/**
* Reads in a public key created using the default encryption algorithm from a stream.
*
* @param source Stream attached to the public key
* @return The public key read
*/
public static PublicKey readPublicKey ( InputStream source ) {
return decodePublicKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
}
/**
* Decodes the raw bytes of a public key into a usable object.
*
* @param rawKey The encoded bytes of a public key as read from, eg., a file. The
* key must be in the standard X.509 format for a public key.
* @param encryptionAlgorithm The encryption algorithm used to create the public key
* @return The public key as a usable object
*/
public static PublicKey decodePublicKey ( byte[] rawKey, String encryptionAlgorithm ) {
try {
KeySpec keySpec = new X509EncodedKeySpec(rawKey);
KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm);
return keyFactory.generatePublic(keySpec);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
}
catch ( InvalidKeySpecException e ) {
throw new ReviewedStingException("Unable to use X.509 key specification to decode the given key", e);
}
}
/**
* Reads in a private key created using the default encryption algorithm from a file.
*
* @param source File containing the private key
* @return The private key read
*/
public static PrivateKey readPrivateKey ( File source ) {
return decodePrivateKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
}
/**
* Reads in a private key created using the default encryption algorithm from a stream.
*
* @param source Stream attached to the private key
* @return The private key read
*/
public static PrivateKey readPrivateKey ( InputStream source ) {
return decodePrivateKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
}
/**
* Decodes the raw bytes of a private key into a usable object.
*
* @param rawKey The encoded bytes of a private key as read from, eg., a file. The
* key must be in the standard PKCS #8 format for a private key.
* @param encryptionAlgorithm The encryption algorithm used to create the private key
* @return The private key as a usable object
*/
public static PrivateKey decodePrivateKey ( byte[] rawKey, String encryptionAlgorithm ) {
try {
KeySpec keySpec = new PKCS8EncodedKeySpec(rawKey);
KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm);
return keyFactory.generatePrivate(keySpec);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
}
catch ( InvalidKeySpecException e ) {
throw new ReviewedStingException("Unable to use the PKCS #8 key specification to decode the given key", e);
}
}
/**
* Loads the copy of the GATK public key that is distributed with the GATK. Uses the system
* ClassLoader to locate the public key file, which should be stored at the root of the GATK
* jar file.
*
* @return The GATK public key as a usable object
*/
public static PublicKey loadGATKDistributedPublicKey() {
InputStream publicKeyInputStream = ClassLoader.getSystemResourceAsStream(GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME);
if ( publicKeyInputStream == null ) {
throw new ReviewedStingException(String.format("Could not locate the GATK public key %s in the classpath",
GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME));
}
return readPublicKey(publicKeyInputStream);
}
/**
* Loads the master copy of the GATK private key. You must have the appropriate UNIX permissions
* to do this!
*
* @return The GATK master private key as a usable object
*/
public static PrivateKey loadGATKMasterPrivateKey() {
return readPrivateKey(new File(GATK_MASTER_PRIVATE_KEY_FILE));
}
/**
* Loads the master copy of the GATK public key. This should always be the same as the
* public key distributed with the GATK returned by loadGATKDistributedPublicKey().
*
* @return The GATK master public key as a usable object
*/
public static PublicKey loadGATKMasterPublicKey() {
return readPublicKey(new File(GATK_MASTER_PUBLIC_KEY_FILE));
}
/**
* Encrypts the given data using the key provided.
*
* @param data The data to encrypt, as a byte array
* @param encryptKey The key with which to encrypt the data
* @return The encrypted version of the provided data
*/
public static byte[] encryptData ( byte[] data, Key encryptKey ) {
return transformDataUsingCipher(data, encryptKey, Cipher.ENCRYPT_MODE);
}
/**
* Decrypts the given data using the key provided.
*
* @param encryptedData Data to decrypt, as a byte array
* @param decryptKey The key with which to decrypt the data
* @return The decrypted version of the provided data
*/
public static byte[] decryptData ( byte[] encryptedData, Key decryptKey ) {
return transformDataUsingCipher(encryptedData, decryptKey, Cipher.DECRYPT_MODE);
}
/**
* Helper method for encryption/decryption that takes data and processes it using
* the given key
*
* @param data Data to encrypt/decrypt
* @param key Key to use to encrypt/decrypt the data
* @param cipherMode Specifies whether we are encrypting or decrypting
* @return The encrypted/decrypted data
*/
private static byte[] transformDataUsingCipher ( byte[] data, Key key, int cipherMode ) {
try {
Cipher cipher = Cipher.getInstance(key.getAlgorithm());
cipher.init(cipherMode, key);
return cipher.doFinal(data);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested algorithm %s",
key.getAlgorithm()), e);
}
catch ( InvalidKeyException e ) {
throw new ReviewedStingException("Key is invalid", e);
}
catch ( GeneralSecurityException e ) {
throw new ReviewedStingException("Error during encryption", e);
}
}
/**
* Tests whether the public/private keys provided can each decrypt data encrypted by
* the other key -- ie., tests whether these two keys are part of the same public/private
* key pair.
*
* @param privateKey The private key to test
* @param publicKey The public key to test
* @return True if the keys are part of the same key pair and can decrypt each other's
* encrypted data, otherwise false.
*/
public static boolean keysDecryptEachOther ( PrivateKey privateKey, PublicKey publicKey ) {
byte[] plainText = "Test PlainText".getBytes();
byte[] dataEncryptedUsingPrivateKey = CryptUtils.encryptData(plainText, privateKey);
byte[] dataEncryptedUsingPublicKey = CryptUtils.encryptData(plainText, publicKey);
byte[] privateKeyDataDecryptedWithPublicKey = CryptUtils.decryptData(dataEncryptedUsingPrivateKey, publicKey);
byte[] publicKeyDataDecryptedWithPrivateKey = CryptUtils.decryptData(dataEncryptedUsingPublicKey, privateKey);
// Make sure we actually transformed the data during encryption:
if ( Arrays.equals(plainText, dataEncryptedUsingPrivateKey) ||
Arrays.equals(plainText, dataEncryptedUsingPublicKey) ||
Arrays.equals(dataEncryptedUsingPrivateKey, dataEncryptedUsingPublicKey) ) {
return false;
}
// Make sure that we were able to recreate the original plaintext using
// both the public key on the private-key-encrypted data and the private
// key on the public-key-encrypted data:
if ( ! Arrays.equals(plainText, privateKeyDataDecryptedWithPublicKey) ||
! Arrays.equals(plainText, publicKeyDataDecryptedWithPrivateKey) ) {
return false;
}
return true;
}
}

View File

@ -0,0 +1,349 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.io.IOUtils;
import java.io.*;
import java.security.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* Class to represent a GATK user key.
*
* A GATK user key contains an email address and a cryptographic signature.
* The signature is the SHA-1 hash of the email address encrypted using
* the GATK master private key. The GATK master public key (distributed
* with the GATK) is used to decrypt the signature and validate the key
* at the start of each GATK run that requires a key.
*
* Keys are cryptographically secure in that valid keys definitely come
* from us and cannot be fabricated, however nothing prevents keys from
* being shared between users.
*
* GATK user keys have the following on-disk format:
*
* GZIP Container:
* Email address
* NUL byte (delimiter)
* Cryptographic Signature (encrypted SHA-1 hash of email address)
*
* The key data is wrapped within a GZIP container to placate over-zealous
* email filters (since keys must often be emailed) and also to provide an
* additional integrity check via the built-in GZIP CRC.
*
* @author David Roazen
*/
public class GATKKey {
/**
* Private key used to sign the GATK key. Required only when creating a new
* key from scratch, not when loading an existing key from disk.
*/
private PrivateKey privateKey;
/**
* Public key used to validate the GATK key.
*/
private PublicKey publicKey;
/**
* The user's email address, stored within the key and signed.
*/
private String emailAddress;
/**
* The cryptographic signature of the email address. By default, this is
* the SHA-1 hash of the email address encrypted using the RSA algorithm.
*/
private byte[] signature;
/**
* The combination of hash/encryption algorithms to use to generate the signature.
* By default this is "SHA1withRSA"
*/
private String signingAlgorithm;
/**
* Default hash/encryption algorithms to use to sign the key.
*/
public static final String DEFAULT_SIGNING_ALGORITHM = "SHA1withRSA";
/**
* Byte value used to separate the email address from its signature in the key file.
*/
public static final byte GATK_KEY_SECTIONAL_DELIMITER = 0;
// -----------------------
// Constructors:
// -----------------------
/**
* Constructor to create a new GATK key from scratch using an email address
* and public/private key pair. The private key is used for signing, and the
* public key is used to validate the newly-created key.
*
* @param privateKey Private key used to sign the new GATK key
* @param publicKey Public key used to validate the new GATK key
* @param emailAddress The user's email address, which we will store in the key and sign
*/
public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress ) {
this(privateKey, publicKey, emailAddress, DEFAULT_SIGNING_ALGORITHM);
}
/**
* Constructor to create a new GATK key from scratch using an email address
* and public/private key pair, and additionally specify the signing algorithm
* to use. The private key is used for signing, and the public key is used to
* validate the newly-created key.
*
* @param privateKey Private key used to sign the new GATK key
* @param publicKey Public key used to validate the new GATK key
* @param emailAddress The user's email address, which we will store in the key and sign
* @param signingAlgorithm The combination of hash and encryption algorithms to use to sign the key
*/
public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress, String signingAlgorithm ) {
if ( privateKey == null || publicKey == null || emailAddress == null || emailAddress.length() == 0 || signingAlgorithm == null ) {
throw new ReviewedStingException("Cannot construct GATKKey using null/empty arguments");
}
this.privateKey = privateKey;
this.publicKey = publicKey;
this.emailAddress = emailAddress;
this.signingAlgorithm = signingAlgorithm;
validateEmailAddress();
generateSignature();
if ( ! isValid() ) {
throw new ReviewedStingException("Newly-generated GATK key fails validation -- this should never happen!");
}
}
/**
* Constructor to load an existing GATK key from a file.
*
* During loading, the key file is checked for integrity, but not cryptographic
* validity (which must be done through a subsequent call to isValid()).
*
* @param publicKey Public key that will be used to validate the loaded GATK key
* in subsequent calls to isValid()
* @param keyFile File containing the GATK key to load
*/
public GATKKey ( PublicKey publicKey, File keyFile ) {
this(publicKey, keyFile, DEFAULT_SIGNING_ALGORITHM);
}
/**
* Constructor to load an existing GATK key from a file, and additionally specify
* the signing algorithm used to sign the key being loaded.
*
* During loading, the key file is checked for integrity, but not cryptographic
* validity (which must be done through a subsequent call to isValid()).
*
* @param publicKey Public key that will be used to validate the loaded GATK key
* in subsequent calls to isValid()
* @param keyFile File containing the GATK key to load
* @param signingAlgorithm The combination of hash and encryption algorithms used to sign the key
*/
public GATKKey ( PublicKey publicKey, File keyFile, String signingAlgorithm ) {
if ( publicKey == null || keyFile == null || signingAlgorithm == null ) {
throw new ReviewedStingException("Cannot construct GATKKey using null arguments");
}
this.publicKey = publicKey;
this.signingAlgorithm = signingAlgorithm;
readKey(keyFile);
}
// -----------------------
// Public API Methods:
// -----------------------
/**
* Writes out this key to a file in the format described at the top of this class,
* encapsulating the key within a GZIP container.
*
* @param destination File to write the key to
*/
public void writeKey ( File destination ) {
try {
byte[] keyBytes = marshalKeyData();
IOUtils.writeByteArrayToStream(keyBytes, new GZIPOutputStream(new FileOutputStream(destination)));
}
catch ( IOException e ) {
throw new UserException.CouldNotCreateOutputFile(destination, e);
}
}
/**
* Checks whether the signature of this key is cryptographically valid (ie., can be
* decrypted by the public key to produce a valid SHA-1 hash of the email address
* in the key).
*
* @return True if the key's signature passes validation, otherwise false
*/
public boolean isValid() {
try {
Signature sig = Signature.getInstance(signingAlgorithm);
sig.initVerify(publicKey);
sig.update(emailAddress.getBytes());
return sig.verify(signature);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e);
}
catch ( InvalidKeyException e ) {
// If the GATK public key is invalid, it's likely our problem, not the user's:
throw new ReviewedStingException(String.format("Public key %s is invalid", publicKey), e);
}
catch ( SignatureException e ) {
throw new UserException.UnreadableKeyException("Signature is invalid or signing algorithm was unable to process the input data", e);
}
}
// -----------------------
// Private Helper Methods:
// -----------------------
/**
* Helper method that creates a signature for this key using the combination of
* hash/encryption algorithms specified at construction time.
*/
private void generateSignature() {
try {
Signature sig = Signature.getInstance(signingAlgorithm);
sig.initSign(privateKey, CryptUtils.createRandomnessSource());
sig.update(emailAddress.getBytes());
signature = sig.sign();
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e);
}
catch ( InvalidKeyException e ) {
throw new ReviewedStingException(String.format("Private key %s is invalid", privateKey), e);
}
catch ( SignatureException e ) {
throw new ReviewedStingException(String.format("Error creating signature for email address %s", emailAddress), e);
}
}
/**
* Helper method that reads in a GATK key from a file. Should not be called directly --
* use the appropriate constructor above.
*
* @param source File to read the key from
*/
private void readKey ( File source ) {
try {
byte[] keyBytes = IOUtils.readStreamIntoByteArray(new GZIPInputStream(new FileInputStream(source)));
// As a sanity check, compare the number of bytes read to the uncompressed file size
// stored in the GZIP ISIZE field. If they don't match, the key must be corrupt:
if ( keyBytes.length != IOUtils.getGZIPFileUncompressedSize(source) ) {
throw new UserException.UnreadableKeyException("Number of bytes read does not match the uncompressed size specified in the GZIP ISIZE field");
}
unmarshalKeyData(keyBytes);
}
catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(source, e);
}
catch ( IOException e ) {
throw new UserException.UnreadableKeyException(source, e);
}
catch ( UserException.CouldNotReadInputFile e ) {
throw new UserException.UnreadableKeyException(source, e);
}
}
/**
* Helper method that assembles the email address and signature into a format
* suitable for writing to disk.
*
* @return The aggregated key data, ready to be written to disk
*/
private byte[] marshalKeyData() {
byte[] emailAddressBytes = emailAddress.getBytes();
byte[] assembledKey = new byte[emailAddressBytes.length + 1 + signature.length];
System.arraycopy(emailAddressBytes, 0, assembledKey, 0, emailAddressBytes.length);
assembledKey[emailAddressBytes.length] = GATK_KEY_SECTIONAL_DELIMITER;
System.arraycopy(signature, 0, assembledKey, emailAddressBytes.length + 1, signature.length);
return assembledKey;
}
/**
* Helper method that parses the raw key data from disk into its component
* email address and signature. Performs some basic validation in the process.
*
* @param keyBytes The raw, uncompressed key data read from disk
*/
private void unmarshalKeyData ( byte[] keyBytes ) {
int delimiterPosition = -1;
for ( int i = 0; i < keyBytes.length; i++ ) {
if ( keyBytes[i] == GATK_KEY_SECTIONAL_DELIMITER ) {
delimiterPosition = i;
break;
}
}
if ( delimiterPosition == -1 ) {
throw new UserException.UnreadableKeyException("Malformed GATK key contains no sectional delimiter");
}
else if ( delimiterPosition == 0 ) {
throw new UserException.UnreadableKeyException("Malformed GATK key contains no email address");
}
else if ( delimiterPosition == keyBytes.length - 1 ) {
throw new UserException.UnreadableKeyException("Malformed GATK key contains no signature");
}
byte[] emailAddressBytes = new byte[delimiterPosition];
System.arraycopy(keyBytes, 0, emailAddressBytes, 0, delimiterPosition);
emailAddress = new String(emailAddressBytes);
signature = new byte[keyBytes.length - delimiterPosition - 1];
System.arraycopy(keyBytes, delimiterPosition + 1, signature, 0, keyBytes.length - delimiterPosition - 1);
}
/**
* Helper method that ensures that the user's email address does not contain the NUL byte, which we
* reserve as a delimiter within each key file.
*/
private void validateEmailAddress() {
for ( byte b : emailAddress.getBytes() ) {
if ( b == GATK_KEY_SECTIONAL_DELIMITER ) {
throw new UserException(String.format("Email address must not contain a byte with value %d", GATK_KEY_SECTIONAL_DELIMITER));
}
}
}
}

View File

@ -132,6 +132,10 @@ public class UserException extends ReviewedStingException {
public CouldNotReadInputFile(File file, Exception e) {
this(file, e.getMessage());
}
public CouldNotReadInputFile(String message) {
super(message);
}
}
@ -151,6 +155,10 @@ public class UserException extends ReviewedStingException {
public CouldNotCreateOutputFile(File file, Exception e) {
super(String.format("Couldn't write file %s because exception %s", file.getAbsolutePath(), e.getMessage()));
}
public CouldNotCreateOutputFile(String message, Exception e) {
super(message, e);
}
}
public static class MissortedBAM extends UserException {
@ -319,4 +327,32 @@ public class UserException extends ReviewedStingException {
"and try again.", null);
}
}
public static class UnreadableKeyException extends UserException {
public UnreadableKeyException ( File f, Exception e ) {
super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
f.getAbsolutePath(), e.getMessage()));
}
public UnreadableKeyException ( String message, Exception e ) {
this(String.format("%s. Error was: %s", message, e.getMessage()));
}
public UnreadableKeyException ( String message ) {
super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
message));
}
}
public static class KeySignatureVerificationException extends UserException {
public KeySignatureVerificationException ( File f ) {
super(String.format("The signature in key file %s failed cryptographic verification. " +
"If this key was valid in the past, it's likely been revoked. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
"for help.",
f.getAbsolutePath()));
}
}
}

View File

@ -151,23 +151,16 @@ public class FragmentUtils {
final int numBases = firstReadStop + secondRead.getReadLength();
final byte[] bases = new byte[numBases];
final byte[] quals = new byte[numBases];
// BUGBUG: too verbose, clean this up.
final byte[] insertionQuals = new byte[numBases];
final byte[] deletionQuals = new byte[numBases];
final byte[] firstReadBases = firstRead.getReadBases();
final byte[] firstReadQuals = firstRead.getBaseQualities();
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
final byte[] secondReadBases = secondRead.getReadBases();
final byte[] secondReadQuals = secondRead.getBaseQualities();
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
for(int iii = 0; iii < firstReadStop; iii++) {
bases[iii] = firstReadBases[iii];
quals[iii] = firstReadQuals[iii];
insertionQuals[iii] = firstReadInsertionQuals[iii];
deletionQuals[iii] = firstReadDeletionQuals[iii];
}
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
@ -175,22 +168,16 @@ public class FragmentUtils {
}
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
}
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
bases[iii] = secondReadBases[iii-firstReadStop];
quals[iii] = secondReadQuals[iii-firstReadStop];
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
}
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
returnRead.setAlignmentStart(firstRead.getUnclippedStart());
returnRead.setReadBases( bases );
returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION );
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
returnRead.setBaseQualities( quals );
returnRead.setReadGroup( firstRead.getReadGroup() );
returnRead.setReferenceName( firstRead.getReferenceName() );
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
@ -199,6 +186,27 @@ public class FragmentUtils {
returnRead.setCigar( new Cigar( cList ));
returnRead.setMappingQuality( firstRead.getMappingQuality() );
if( firstRead.hasBaseIndelQualities() || secondRead.hasBaseIndelQualities() ) {
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
for(int iii = 0; iii < firstReadStop; iii++) {
insertionQuals[iii] = firstReadInsertionQuals[iii];
deletionQuals[iii] = firstReadDeletionQuals[iii];
}
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score
}
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
}
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
}
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();
returnList.add(returnRead);
return returnList;

View File

@ -29,10 +29,13 @@ import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.*;
public class IOUtils {
@ -400,4 +403,173 @@ public class IOUtils {
public static boolean isSpecialFile(File file) {
return file != null && (file.getAbsolutePath().startsWith("/dev/") || file.equals(DEV_DIR));
}
/**
* Reads the entirety of the given file into a byte array. Uses a read buffer size of 4096 bytes.
*
* @param source File to read
* @return The contents of the file as a byte array
*/
public static byte[] readFileIntoByteArray ( File source ) {
return readFileIntoByteArray(source, 4096);
}
/**
* Reads the entirety of the given file into a byte array using the requested read buffer size.
*
* @param source File to read
* @param readBufferSize Number of bytes to read in at one time
* @return The contents of the file as a byte array
*/
public static byte[] readFileIntoByteArray ( File source, int readBufferSize ) {
if ( source == null ) {
throw new ReviewedStingException("Source file was null");
}
byte[] fileContents;
try {
fileContents = readStreamIntoByteArray(new FileInputStream(source), readBufferSize);
}
catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(source, e);
}
if ( fileContents.length != source.length() ) {
throw new UserException.CouldNotReadInputFile(String.format("Unable to completely read file %s: read only %d/%d bytes",
source.getAbsolutePath(), fileContents.length, source.length()));
}
return fileContents;
}
/**
* Reads all data from the given stream into a byte array. Uses a read buffer size of 4096 bytes.
*
* @param in Stream to read data from
* @return The contents of the stream as a byte array
*/
public static byte[] readStreamIntoByteArray ( InputStream in ) {
return readStreamIntoByteArray(in, 4096);
}
/**
* Reads all data from the given stream into a byte array using the requested read buffer size.
*
* @param in Stream to read data from
* @param readBufferSize Number of bytes to read in at one time
* @return The contents of the stream as a byte array
*/
public static byte[] readStreamIntoByteArray ( InputStream in, int readBufferSize ) {
if ( in == null ) {
throw new ReviewedStingException("Input stream was null");
}
else if ( readBufferSize <= 0 ) {
throw new ReviewedStingException("Read buffer size must be > 0");
}
// Use a fixed-size buffer for each read, but a dynamically-growing buffer
// to hold the accumulated contents of the file/stream:
byte[] readBuffer = new byte[readBufferSize];
ByteArrayOutputStream fileBuffer = new ByteArrayOutputStream(readBufferSize * 4);
try {
try {
int currentBytesRead;
while ( (currentBytesRead = in.read(readBuffer, 0, readBuffer.length)) >= 0 ) {
fileBuffer.write(readBuffer, 0, currentBytesRead);
}
}
finally {
in.close();
}
}
catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile("I/O error reading from input stream", e);
}
return fileBuffer.toByteArray();
}
/**
* Writes the given array of bytes to a file
*
* @param bytes Data to write
* @param destination File to write the data to
*/
public static void writeByteArrayToFile ( byte[] bytes, File destination ) {
if ( destination == null ) {
throw new ReviewedStingException("Destination file was null");
}
try {
writeByteArrayToStream(bytes, new FileOutputStream(destination));
}
catch ( FileNotFoundException e ) {
throw new UserException.CouldNotCreateOutputFile(destination, e);
}
}
/**
* Writes the given array of bytes to a stream
*
* @param bytes Data to write
* @param out Stream to write the data to
*/
public static void writeByteArrayToStream ( byte[] bytes, OutputStream out ) {
if ( bytes == null || out == null ) {
throw new ReviewedStingException("Data to write or output stream was null");
}
try {
try {
out.write(bytes);
}
finally {
out.close();
}
}
catch ( IOException e ) {
throw new UserException.CouldNotCreateOutputFile("I/O error writing to output stream", e);
}
}
/**
* Determines the uncompressed size of a GZIP file. Uses the GZIP ISIZE field in the last
* 4 bytes of the file to get this information.
*
* @param gzipFile GZIP-format file whose uncompressed size to determine
* @return The uncompressed size (in bytes) of the GZIP file
*/
public static int getGZIPFileUncompressedSize ( File gzipFile ) {
if ( gzipFile == null ) {
throw new ReviewedStingException("GZIP file to examine was null");
}
try {
// The GZIP ISIZE field holds the uncompressed size of the compressed data.
// It occupies the last 4 bytes of any GZIP file:
RandomAccessFile in = new RandomAccessFile(gzipFile, "r");
in.seek(gzipFile.length() - 4);
byte[] sizeBytes = new byte[4];
in.read(sizeBytes, 0, 4);
ByteBuffer byteBuf = ByteBuffer.wrap(sizeBytes);
byteBuf.order(ByteOrder.LITTLE_ENDIAN); // The GZIP spec mandates little-endian byte order
int uncompressedSize = byteBuf.getInt();
// If the size read in is negative, we've overflowed our signed integer:
if ( uncompressedSize < 0 ) {
throw new UserException.CouldNotReadInputFile(String.format("Cannot accurately determine the uncompressed size of file %s " +
"because it's either larger than %d bytes or the GZIP ISIZE field is corrupt",
gzipFile.getAbsolutePath(), Integer.MAX_VALUE));
}
return uncompressedSize;
}
catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile(gzipFile, e);
}
}
}

View File

@ -194,6 +194,10 @@ public class GATKSAMRecord extends BAMRecord {
}
}
public boolean hasBaseIndelQualities() {
return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null;
}
public byte[] getBaseInsertionQualities() {
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
if( quals == null ) {

View File

@ -6,6 +6,7 @@ import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.crypt.CryptUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.io.IOUtils;
@ -87,6 +88,9 @@ public abstract class BaseTest {
public static final File testDirFile = new File("public/testdata/");
public static final String testDir = testDirFile.getAbsolutePath() + "/";
public static final String keysDataLocation = validationDataLocation + "keys/";
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
/** before the class starts up */
static {
// setup a basic log configuration

View File

@ -29,7 +29,6 @@ package org.broadinstitute.sting;
// the imports for unit testing.
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.LikelihoodCalculationEngine;
import org.broadinstitute.sting.utils.Median;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
@ -42,12 +41,6 @@ import java.util.List;
public class MedianUnitTest extends BaseTest {
LikelihoodCalculationEngine engine;
@BeforeSuite
public void before() {
engine = new LikelihoodCalculationEngine(0, 0, false);
}
// --------------------------------------------------------------------------------
//

View File

@ -30,6 +30,7 @@ import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.CommandLineGATK;
@ -45,7 +46,7 @@ import java.io.File;
import java.util.*;
public class WalkerTest extends BaseTest {
private static final boolean ENABLE_REPORTING = false;
private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false;
@BeforeMethod
public void initializeRandomGenerator() {
@ -121,11 +122,19 @@ public class WalkerTest extends BaseTest {
}
public class WalkerTestSpec {
// Arguments implicitly included in all Walker command lines, unless explicitly
// disabled using the disableImplicitArgs() method below.
final String IMPLICIT_ARGS = ENABLE_PHONE_HOME_FOR_TESTS ?
String.format("-et %s", GATKRunReport.PhoneHomeOption.STANDARD) :
String.format("-et %s -K %s", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile);
String args = "";
int nOutputFiles = -1;
List<String> md5s = null;
List<String> exts = null;
Class expectedException = null;
boolean includeImplicitArgs = true;
// the default output path for the integration test
private File outputFileLocation = null;
@ -159,6 +168,10 @@ public class WalkerTest extends BaseTest {
this.expectedException = expectedException;
}
public String getArgsWithImplicitArgs() {
return args + (includeImplicitArgs ? " " + IMPLICIT_ARGS : "");
}
public void setOutputFileLocation(File outputFileLocation) {
this.outputFileLocation = outputFileLocation;
}
@ -180,6 +193,9 @@ public class WalkerTest extends BaseTest {
auxillaryFiles.put(expectededMD5sum, outputfile);
}
public void disableImplicitArgs() {
includeImplicitArgs = false;
}
}
protected boolean parameterize() {
@ -213,7 +229,7 @@ public class WalkerTest extends BaseTest {
tmpFiles.add(fl);
}
final String args = String.format(spec.args, tmpFiles.toArray());
final String args = String.format(spec.getArgsWithImplicitArgs(), tmpFiles.toArray());
System.out.println(Utils.dupString('-', 80));
if ( spec.expectsException() ) {
@ -277,13 +293,10 @@ public class WalkerTest extends BaseTest {
* @param args the argument list
* @param expectedException the expected exception or null
*/
public static void executeTest(String name, String args, Class expectedException) {
private void executeTest(String name, String args, Class expectedException) {
CommandLineGATK instance = new CommandLineGATK();
String[] command = Utils.escapeExpressions(args);
// add the logging level to each of the integration test commands
command = Utils.appendArray(command, "-et", ENABLE_REPORTING ? "STANDARD" : "NO_ET");
// run the executable
boolean gotAnException = false;
try {

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diagnostics;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
@Test
public void basicTest() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
1,
Arrays.asList("0cc212ecb6df300e321784039ff29f13"));
executeTest("ErrorRatePerCycle:", spec);
}
}

View File

@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("202b337ebbea3def1be8495eb363dfa8"));
Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232"));
executeTest("test MultiSample Pilot1", spec);
}
@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("ae29b9c9aacce8046dc780430540cd62"));
Arrays.asList("c5b53231f4f6d9524bc4ec8115f44f5c"));
executeTest("test SingleSample Pilot2", spec);
}
@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
Arrays.asList("10027d13befaa07b7900a7af0ae0791c"));
Arrays.asList("5af005255240a2186f04cb50851b8b6f"));
executeTest("test Multiple SNP alleles", spec);
}
@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65";
private final static String COMPRESSED_OUTPUT_MD5 = "a08df9aea2b3df09cf90ff8e6e3be3ea";
@Test
public void testCompressedOutput() {
@ -91,7 +91,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "32a34362dff51d8b73a3335048516d82";
String md5 = "6358934c1c26345013a38261b8c45aa4";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -179,8 +179,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "2cb2544739e01f6c08fd820112914317" );
e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" );
e.put( 0.01, "926b58038dd4989bf7eda697a847eea9" );
e.put( 1.0 / 1850, "93f44105b43b65730a3b821e27b0fa16" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -204,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
Arrays.asList("a1b75a7e12b160b0be823228c958573f"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -223,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
Arrays.asList("3bda1279cd6dcb47885f3e19466f11b9"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899"));
Arrays.asList("d9fc3ba94a0d46029778c7b457e7292a"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -257,7 +257,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6"));
Arrays.asList("b2e30ae3e5ffa6108f9f6178b1d2e679"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f"));
Arrays.asList("2cd182a84613fa91a6020466d2d327e2"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd"));
Arrays.asList("9cd08dc412a007933381e9c76c073899"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@ -290,7 +290,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("320f61c87253aba77d6dc782242cba8b"));
Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("c9897b80615c53a4ea10a4b193d56d9c"));
Arrays.asList("2609675a356f2dfc86f8a1d911210978"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("5282fdb1711a532d726c13507bf80a21"));
Arrays.asList("4fdd8da77167881b71b3547da5c13f94"));
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
}

View File

@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("f909fd8374f663e983b9b3fda4cf5cf1")
Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943")
);
executeTest("testFunctionClassWithSnpeff", spec);
}
@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("4f60acc8a4b21c4b4ccb51ad9071449c"));
1, Arrays.asList("c49e239292704447a36e01ee9a71e729"));
executeTestParallel("testSelect1", spec);
}
@ -335,7 +335,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("190e1a171132832bf92fbca56a9c40bb"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -347,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("08586d443fdcf3b7f63b8f9e3a943c62"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -463,7 +463,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c")
);
executeTest("testModernVCFWithLargeIndels", spec);
}

View File

@ -0,0 +1,163 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.*;
/**
* Basic unit test for Haplotype Class
*/
public class HaplotypeUnitTest extends BaseTest {
@BeforeClass
public void init() {
}
@Test
public void testSimpleInsertionAllele() {
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
}
@Test
public void testSimpleDeletionAllele() {
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
}
@Test
public void testSimpleSNPAllele() {
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA";
basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases);
}
@Test
public void testComplexInsertionAllele() {
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(4, CigarOperator.M));
h1CigarList.add(new CigarElement(10, CigarOperator.I));
h1CigarList.add(new CigarElement(8, CigarOperator.M));
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7, CigarOperator.M));
h1CigarList.add(new CigarElement(4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
}
@Test
public void testComplexDeletionAllele() {
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(4, CigarOperator.M));
h1CigarList.add(new CigarElement(10, CigarOperator.I));
h1CigarList.add(new CigarElement(8, CigarOperator.M));
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7, CigarOperator.M));
h1CigarList.add(new CigarElement(4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
}
@Test
public void testComplexSNPAllele() {
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(4, CigarOperator.M));
h1CigarList.add(new CigarElement(10, CigarOperator.I));
h1CigarList.add(new CigarElement(8, CigarOperator.M));
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7, CigarOperator.M));
h1CigarList.add(new CigarElement(4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AGCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("T", "G", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCTATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("G", "T", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGCGGGA" + "AGGC";
basicInsertTest("G", "C", 17, h1Cigar, bases, h1bases);
}
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
final Haplotype h = new Haplotype(hap.getBytes());
final Allele h1refAllele = Allele.create(ref, true);
final Allele h1altAllele = Allele.create(alt, false);
final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE, 0, cigar) );
final Haplotype h1expected = new Haplotype(newHap.getBytes());
Assert.assertEquals(h1, h1expected);
}
}

View File

@ -205,6 +205,24 @@ public class MathUtilsUnitTest extends BaseTest {
}
}
/**
* Private functions used by testArrayShuffle()
*/
private boolean hasUniqueElements(Object[] x) {
for (int i = 0; i < x.length; i++)
for (int j = i + 1; j < x.length; j++)
if (x[i].equals(x[j]) || x[i] == x[j])
return false;
return true;
}
private boolean hasAllElements(final Object[] expected, final Object[] actual) {
HashSet<Object> set = new HashSet<Object>();
set.addAll(Arrays.asList(expected));
set.removeAll(Arrays.asList(actual));
return set.isEmpty();
}
@Test(enabled = true)
public void testIntAndBitSetConversion() {
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428);
@ -234,26 +252,71 @@ public class MathUtilsUnitTest extends BaseTest {
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31)
}
@Test
public void testApproximateLog10SumLog10() {
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
private boolean hasUniqueElements(Object[] x) {
for (int i = 0; i < x.length; i++)
for (int j = i + 1; j < x.length; j++)
if (x[i].equals(x[j]) || x[i] == x[j])
return false;
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
}
@Test
public void testNormalizeFromLog10() {
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
}
/**
* Private function used by testNormalizeFromLog10()
*/
private boolean compareDoubleArrays(double[] b1, double[] b2) {
if( b1.length != b2.length ) {
return false; // sanity check
}
for( int i=0; i < b1.length; i++ ){
if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 )
return false;
}
return true;
}
private boolean hasAllElements(final Object[] expected, final Object[] actual) {
HashSet<Object> set = new HashSet<Object>();
set.addAll(Arrays.asList(expected));
set.removeAll(Arrays.asList(actual));
return set.isEmpty();
}
private void p (Object []x) {
for (Object v: x)
System.out.print((Integer) v + " ");
System.out.println();
}
}

View File

@ -0,0 +1,198 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.security.Key;
import java.security.KeyPair;
import java.security.PrivateKey;
import java.security.PublicKey;
import java.util.Arrays;
public class CryptUtilsUnitTest extends BaseTest {
@Test
public void testGenerateValidKeyPairWithDefaultSettings() {
KeyPair keyPair = CryptUtils.generateKeyPair();
Assert.assertTrue(CryptUtils.keysDecryptEachOther(keyPair.getPrivate(), keyPair.getPublic()));
}
@DataProvider( name = "InvalidKeyPairSettings" )
public Object[][] invalidKeyPairSettingsDataProvider() {
return new Object[][] {
{ -1, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM},
{ CryptUtils.DEFAULT_KEY_LENGTH, "Made-up algorithm", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM},
{ CryptUtils.DEFAULT_KEY_LENGTH, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, "Made-up algorithm"}
};
}
@Test( dataProvider = "InvalidKeyPairSettings", expectedExceptions = ReviewedStingException.class )
public void testGenerateKeyPairWithInvalidSettings( int keyLength, String encryptionAlgorithm, String randomNumberGenerationAlgorithm ) {
KeyPair keyPair = CryptUtils.generateKeyPair(keyLength, encryptionAlgorithm, randomNumberGenerationAlgorithm);
}
@Test
public void testGATKMasterKeyPairMutualDecryption() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testGATKMasterKeyPairMutualDecryption"));
}
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey()));
}
@Test
public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption"));
}
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey()));
}
@Test
public void testKeyPairWriteThenRead() {
KeyPair keyPair = CryptUtils.generateKeyPair();
File privateKeyFile = createTempFile("testKeyPairWriteThenRead_private", "key");
File publicKeyFile = createTempFile("testKeyPairWriteThenRead_public", "key");
CryptUtils.writeKeyPair(keyPair, privateKeyFile, publicKeyFile);
assertKeysAreEqual(keyPair.getPrivate(), CryptUtils.readPrivateKey(privateKeyFile));
assertKeysAreEqual(keyPair.getPublic(), CryptUtils.readPublicKey(publicKeyFile));
}
@Test
public void testPublicKeyWriteThenReadFromFile() {
File keyFile = createTempFile("testPublicKeyWriteThenReadFromFile", "key");
PublicKey publicKey = CryptUtils.generateKeyPair().getPublic();
CryptUtils.writeKey(publicKey, keyFile);
assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(keyFile));
}
@Test
public void testPublicKeyWriteThenReadFromStream() throws IOException {
File keyFile = createTempFile("testPublicKeyWriteThenReadFromStream", "key");
PublicKey publicKey = CryptUtils.generateKeyPair().getPublic();
CryptUtils.writeKey(publicKey, keyFile);
assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(new FileInputStream(keyFile)));
}
@Test
public void testPrivateKeyWriteThenReadFromFile() {
File keyFile = createTempFile("testPrivateKeyWriteThenReadFromFile", "key");
PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate();
CryptUtils.writeKey(privateKey, keyFile);
assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(keyFile));
}
@Test
public void testPrivateKeyWriteThenReadFromStream() throws IOException {
File keyFile = createTempFile("testPrivateKeyWriteThenReadFromStream", "key");
PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate();
CryptUtils.writeKey(privateKey, keyFile);
assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(new FileInputStream(keyFile)));
}
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
public void testReadNonExistentPublicKey() {
File nonExistentFile = new File("jdshgkdfhg.key");
Assert.assertFalse(nonExistentFile.exists());
CryptUtils.readPublicKey(nonExistentFile);
}
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
public void testReadNonExistentPrivateKey() {
File nonExistentFile = new File("jdshgkdfhg.key");
Assert.assertFalse(nonExistentFile.exists());
CryptUtils.readPrivateKey(nonExistentFile);
}
@Test
public void testDecodePublicKey() {
PublicKey originalKey = CryptUtils.generateKeyPair().getPublic();
PublicKey decodedKey = CryptUtils.decodePublicKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM);
assertKeysAreEqual(originalKey, decodedKey);
}
@Test
public void testDecodePrivateKey() {
PrivateKey originalKey = CryptUtils.generateKeyPair().getPrivate();
PrivateKey decodedKey = CryptUtils.decodePrivateKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM);
assertKeysAreEqual(originalKey, decodedKey);
}
@Test
public void testLoadGATKMasterPrivateKey() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testLoadGATKMasterPrivateKey"));
}
PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
}
@Test
public void testLoadGATKMasterPublicKey() {
PublicKey gatkMasterPublicKey = CryptUtils.loadGATKMasterPublicKey();
}
@Test
public void testLoadGATKDistributedPublicKey() {
PublicKey gatkDistributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
}
private void assertKeysAreEqual( Key originalKey, Key keyFromDisk ) {
Assert.assertTrue(Arrays.equals(originalKey.getEncoded(), keyFromDisk.getEncoded()));
Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm());
Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat());
}
private boolean gatkPrivateKeyExistsButReadPermissionDenied() {
File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE);
return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead();
}
}

View File

@ -0,0 +1,156 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
public class GATKKeyIntegrationTest extends WalkerTest {
public static final String BASE_COMMAND = String.format("-T PrintReads -R %s -I %s -o %%s",
testDir + "exampleFASTA.fasta",
testDir + "exampleBAM.bam");
public static final String MD5_UPON_SUCCESSFUL_RUN = "b9dc5bf6753ca2819e70b056eaf61258";
private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) {
String command = BASE_COMMAND + String.format(" %s %s", etArg, keyArg);
WalkerTestSpec spec = expectedException != null ?
new WalkerTestSpec(command, 1, expectedException) :
new WalkerTestSpec(command, 1, Arrays.asList(md5));
spec.disableImplicitArgs(); // Turn off automatic inclusion of -et/-K args by WalkerTest
executeTest(testName, spec);
}
@Test
public void testValidKeyNoET() {
runGATKKeyTest("testValidKeyNoET",
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
"-K " + keysDataLocation + "valid.key",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
@Test
public void testValidKeyETStdout() {
runGATKKeyTest("testValidKeyETStdout",
"-et " + GATKRunReport.PhoneHomeOption.STDOUT,
"-K " + keysDataLocation + "valid.key",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
@Test
public void testValidKeyETStandard() {
runGATKKeyTest("testValidKeyETStandard",
"",
"-K " + keysDataLocation + "valid.key",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
@Test
public void testNoKeyNoET() {
runGATKKeyTest("testNoKeyNoET",
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
"",
UserException.class,
null);
}
@Test
public void testNoKeyETStdout() {
runGATKKeyTest("testNoKeyETStdout",
"-et " + GATKRunReport.PhoneHomeOption.STDOUT,
"",
UserException.class,
null);
}
@Test
public void testNoKeyETStandard() {
runGATKKeyTest("testNoKeyETStandard",
"",
"",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
@Test
public void testRevokedKey() {
runGATKKeyTest("testRevokedKey",
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
"-K " + keysDataLocation + "revoked.key",
UserException.KeySignatureVerificationException.class,
null);
}
@DataProvider(name = "CorruptKeyTestData")
public Object[][] corruptKeyDataProvider() {
return new Object[][] {
{ "corrupt_empty.key", UserException.UnreadableKeyException.class },
{ "corrupt_single_byte_file.key", UserException.UnreadableKeyException.class },
{ "corrupt_random_contents.key", UserException.UnreadableKeyException.class },
{ "corrupt_single_byte_deletion.key", UserException.UnreadableKeyException.class },
{ "corrupt_single_byte_insertion.key", UserException.UnreadableKeyException.class },
{ "corrupt_single_byte_change.key", UserException.UnreadableKeyException.class },
{ "corrupt_multi_byte_deletion.key", UserException.UnreadableKeyException.class },
{ "corrupt_multi_byte_insertion.key", UserException.UnreadableKeyException.class },
{ "corrupt_multi_byte_change.key", UserException.UnreadableKeyException.class },
{ "corrupt_bad_isize_field.key", UserException.UnreadableKeyException.class },
{ "corrupt_bad_crc.key", UserException.UnreadableKeyException.class },
{ "corrupt_no_email_address.key", UserException.UnreadableKeyException.class },
{ "corrupt_no_sectional_delimiter.key", UserException.KeySignatureVerificationException.class },
{ "corrupt_no_signature.key", UserException.UnreadableKeyException.class },
{ "corrupt_bad_signature.key", UserException.KeySignatureVerificationException.class },
{ "corrupt_non_gzipped_valid_key.key", UserException.UnreadableKeyException.class }
};
}
@Test(dataProvider = "CorruptKeyTestData")
public void testCorruptKey ( String corruptKeyName, Class expectedException ) {
runGATKKeyTest(String.format("testCorruptKey (%s)", corruptKeyName),
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
"-K " + keysDataLocation + corruptKeyName,
expectedException,
null);
}
@Test
public void testCorruptButNonRequiredKey() {
runGATKKeyTest("testCorruptButNonRequiredKey",
"",
"-K " + keysDataLocation + "corrupt_random_contents.key",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
}

View File

@ -0,0 +1,128 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.SkipException;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.io.File;
import java.security.KeyPair;
import java.security.PrivateKey;
import java.security.PublicKey;
public class GATKKeyUnitTest extends BaseTest {
@Test
public void testCreateGATKKeyUsingMasterKeyPair() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testCreateGATKKeyUsingMasterKeyPair"));
}
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey();
// We should be able to create a valid GATKKey using our master key pair:
GATKKey key = new GATKKey(masterPrivateKey, masterPublicKey, "foo@bar.com");
Assert.assertTrue(key.isValid());
}
@Test
public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() {
if ( gatkPrivateKeyExistsButReadPermissionDenied() ) {
throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key",
"testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey"));
}
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
// We should also be able to create a valid GATKKey using our master private
// key and the public key we distribute with the GATK:
GATKKey key = new GATKKey(masterPrivateKey, distributedPublicKey, "foo@bar.com");
Assert.assertTrue(key.isValid());
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testKeyPairMismatch() {
KeyPair firstKeyPair = CryptUtils.generateKeyPair();
KeyPair secondKeyPair = CryptUtils.generateKeyPair();
// Attempting to create a GATK Key with private and public keys that aren't part of the
// same key pair should immediately trigger a validation failure:
GATKKey key = new GATKKey(firstKeyPair.getPrivate(), secondKeyPair.getPublic(), "foo@bar.com");
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testEncryptionAlgorithmMismatch() {
KeyPair keyPair = CryptUtils.generateKeyPair(CryptUtils.DEFAULT_KEY_LENGTH, "DSA", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
// Attempting to use a DSA private key to create an RSA signature should throw an error:
GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), "foo@bar.com", "SHA1withRSA");
}
@Test( expectedExceptions = UserException.class )
public void testInvalidEmailAddress() {
String emailAddressWithNulByte = new String(new byte[] { 0 });
KeyPair keyPair = CryptUtils.generateKeyPair();
// Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file:
GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), emailAddressWithNulByte);
}
@Test
public void testCreateGATKKeyFromValidKeyFile() {
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "valid.key"));
Assert.assertTrue(key.isValid());
}
@Test( expectedExceptions = UserException.UnreadableKeyException.class )
public void testCreateGATKKeyFromCorruptKeyFile() {
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "corrupt_random_contents.key"));
}
@Test
public void testCreateGATKKeyFromRevokedKeyFile() {
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "revoked.key"));
Assert.assertFalse(key.isValid());
}
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
public void testCreateGATKKeyFromNonExistentFile() {
File nonExistentFile = new File("ghfdkgsdhg.key");
Assert.assertFalse(nonExistentFile.exists());
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile);
}
private boolean gatkPrivateKeyExistsButReadPermissionDenied() {
File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE);
return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead();
}
}

View File

@ -27,12 +27,18 @@ package org.broadinstitute.sting.utils.io;
import org.apache.commons.io.FileUtils;
import org.broadinstitute.sting.BaseTest;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
public class IOUtilsUnitTest extends BaseTest {
@ -230,4 +236,90 @@ public class IOUtilsUnitTest extends BaseTest {
Assert.assertFalse(IOUtils.isSpecialFile(new File("/home/user/my.file")));
Assert.assertFalse(IOUtils.isSpecialFile(new File("/devfake/null")));
}
@DataProvider( name = "ByteArrayIOTestData")
public Object[][] byteArrayIOTestDataProvider() {
return new Object[][] {
// file size, read buffer size
{ 0, 4096 },
{ 1, 4096 },
{ 2000, 4096 },
{ 4095, 4096 },
{ 4096, 4096 },
{ 4097, 4096 },
{ 6000, 4096 },
{ 8191, 4096 },
{ 8192, 4096 },
{ 8193, 4096 },
{ 10000, 4096 }
};
}
@Test( dataProvider = "ByteArrayIOTestData" )
public void testWriteThenReadFileIntoByteArray ( int fileSize, int readBufferSize ) throws Exception {
File tempFile = createTempFile(String.format("testWriteThenReadFileIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp");
byte[] dataWritten = getDeterministicRandomData(fileSize);
IOUtils.writeByteArrayToFile(dataWritten, tempFile);
byte[] dataRead = IOUtils.readFileIntoByteArray(tempFile, readBufferSize);
Assert.assertEquals(dataRead.length, dataWritten.length);
Assert.assertTrue(Arrays.equals(dataRead, dataWritten));
}
@Test( dataProvider = "ByteArrayIOTestData" )
public void testWriteThenReadStreamIntoByteArray ( int fileSize, int readBufferSize ) throws Exception {
File tempFile = createTempFile(String.format("testWriteThenReadStreamIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp");
byte[] dataWritten = getDeterministicRandomData(fileSize);
IOUtils.writeByteArrayToStream(dataWritten, new FileOutputStream(tempFile));
byte[] dataRead = IOUtils.readStreamIntoByteArray(new FileInputStream(tempFile), readBufferSize);
Assert.assertEquals(dataRead.length, dataWritten.length);
Assert.assertTrue(Arrays.equals(dataRead, dataWritten));
}
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
public void testReadNonExistentFileIntoByteArray() {
File nonExistentFile = new File("djfhsdkjghdfk");
Assert.assertFalse(nonExistentFile.exists());
IOUtils.readFileIntoByteArray(nonExistentFile);
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testReadNullStreamIntoByteArray() {
IOUtils.readStreamIntoByteArray(null);
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testReadStreamIntoByteArrayInvalidBufferSize() throws Exception {
IOUtils.readStreamIntoByteArray(new FileInputStream(createTempFile("testReadStreamIntoByteArrayInvalidBufferSize", "tmp")),
-1);
}
@Test( expectedExceptions = UserException.CouldNotCreateOutputFile.class )
public void testWriteByteArrayToUncreatableFile() {
IOUtils.writeByteArrayToFile(new byte[]{0}, new File("/dev/foo/bar"));
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testWriteNullByteArrayToFile() {
IOUtils.writeByteArrayToFile(null, createTempFile("testWriteNullByteArrayToFile", "tmp"));
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testWriteByteArrayToNullStream() {
IOUtils.writeByteArrayToStream(new byte[]{0}, null);
}
private byte[] getDeterministicRandomData ( int size ) {
GenomeAnalysisEngine.resetRandomGenerator();
Random rand = GenomeAnalysisEngine.getRandomGenerator();
byte[] randomData = new byte[size];
rand.nextBytes(randomData);
return randomData;
}
}

Binary file not shown.

View File

@ -36,6 +36,8 @@
<dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
<!-- All R scripts in org.broadinstitute.sting -->
<dir name="org/broadinstitute/sting" includes="**/*.R" />
<!-- The GATK public key -->
<file path="GATK_public.key" />
</dependencies>
</executable>
<resources>