diff --git a/build.xml b/build.xml index 1df75cd1d..d3e25d424 100644 --- a/build.xml +++ b/build.xml @@ -47,6 +47,7 @@ + @@ -567,6 +568,7 @@ + @@ -615,6 +617,9 @@ + + + @@ -879,6 +884,7 @@ + diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 32002e093..e5aaf2338 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -35,9 +35,12 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.crypt.CryptUtils; +import org.broadinstitute.sting.utils.crypt.GATKKey; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.ListFileUtils; +import java.security.PublicKey; import java.util.*; /** @@ -78,6 +81,9 @@ public abstract class CommandLineExecutable extends CommandLineProgram { Walker walker = engine.getWalkerByName(getAnalysisName()); try { + // Make sure a valid GATK user key is present, if required. + authorizeGATKRun(); + engine.setArguments(getArgumentCollection()); // File lists can require a bit of additional expansion. Set these explicitly by the engine. @@ -130,6 +136,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return 0; } + /** + * Authorizes this run of the GATK by checking for a valid GATK user key, if required. + * Currently, a key is required only if running with the -et NO_ET or -et STDOUT options. + */ + private void authorizeGATKRun() { + if ( getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.NO_ET || + getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) { + if ( getArgumentCollection().gatkKeyFile == null ) { + throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " + + "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + + "for more information and instructions on how to obtain a key."); + } + else { + PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey(); + GATKKey gatkUserKey = new GATKKey(gatkPublicKey, getArgumentCollection().gatkKeyFile); + + if ( ! gatkUserKey.isValid() ) { + throw new UserException.KeySignatureVerificationException(getArgumentCollection().gatkKeyFile); + } + } + } + } /** * Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled. diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 8ec707801..02d211a0c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -65,9 +65,12 @@ public class GATKArgumentCollection { @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false) public Integer readBufferSize = null; - @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false) + @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; + @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) + public File gatkKeyFile = null; + @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false) public List readFilters = new ArrayList(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index e8627ef4c..f1f74069f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -154,9 +154,7 @@ public class GATKRunReport { /** Standard option. Writes to local repository if it can be found, or S3 otherwise */ STANDARD, /** Force output to STDOUT. For debugging only */ - STDOUT, - /** Force output to S3. For debugging only */ - AWS_S3 // todo -- remove me -- really just for testing purposes + STDOUT } private static final DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss"); @@ -239,11 +237,8 @@ public class GATKRunReport { case STDOUT: postReportToStream(System.out); break; - case AWS_S3: - postReportToAWSS3(); - break; default: - exceptDuringRunReport("BUG: unexcepted PhoneHomeOption "); + exceptDuringRunReport("BUG: unexpected PhoneHomeOption "); break; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index b72b20e0b..b59b550e1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -296,6 +296,10 @@ public class GATKReportTable { return primaryKeyColumn.contains(primaryKey); } + public Collection getPrimaryKeys() { + return Collections.unmodifiableCollection(primaryKeyColumn); + } + /** * Set the value for a given position in the table * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 38e7051e4..cc6f67cc9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -25,8 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -39,6 +45,63 @@ import org.broadinstitute.sting.commandline.Hidden; public class RecalibrationArgumentCollection { + /** + * This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference, + * so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.) + * for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites. + * Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument. + */ + @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false) + protected List> knownSites = Collections.emptyList(); + + /** + * After the header, data records occur one per line until the end of the file. The first several items on a line are the + * values of the individual covariates and will change depending on which covariates were specified at runtime. The last + * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, + * and the raw empirical quality score calculated by phred-scaling the mismatch rate. + */ + @Gather(CountCovariatesGatherer.class) + @Output + protected PrintStream RECAL_FILE; + + /** + * List all implemented covariates. + */ + @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false) + protected boolean LIST_ONLY = false; + + /** + * Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list. + */ + @Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false) + protected String[] COVARIATES = null; + + /* + * Use the standard set of covariates in addition to the ones listed using the -cov argument + */ + @Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false) + protected boolean USE_STANDARD_COVARIATES = true; + + ///////////////////////////// + // Debugging-only Arguments + ///////////////////////////// + /** + * This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option. + */ + @Hidden + @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.") + protected boolean RUN_WITHOUT_DBSNP = false; + + ///////////////////////////// + // protected Member Variables + ///////////////////////////// + protected final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables) + protected final ArrayList requestedCovariates = new ArrayList();// A list to hold the covariate objects that were requested + + protected final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. + protected final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. + + /** * CountCovariates and TableRecalibration accept a --solid_recal_mode flag which governs how the recalibrator handles the * reads which have had the reference inserted because of color space inconsistencies. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 94f9eb6c5..833dce932 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -29,6 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -115,6 +116,7 @@ import java.util.*; // todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now @By(DataSource.REFERENCE) @PartitionBy(PartitionType.NONE) +@Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE) public class DepthOfCoverageWalker extends LocusWalker>, CoveragePartitioner> implements TreeReducible { @Output @Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"}) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java new file mode 100755 index 000000000..e7a2f74e2 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -0,0 +1,162 @@ +package org.broadinstitute.sting.gatk.walkers.diagnostics; + +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.io.PrintStream; + +/** + * Computes the read error rate per position in read (in the original 5'->3' orientation that the read had coming off the machine) + * + * Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read + * group in the input BAMs FOR ONLY THE FIRST OF PAIR READS. + * + *

Input

+ *

+ * Any number of BAM files + *

+ * + *

Output

+ *

+ * GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate. + * + * For example, running this tool on the NA12878 data sets: + * + *

+ *      ##:GATKReport.v0.2 ErrorRatePerCycle : The error rate per sequenced position in the reads
+ *      readgroup  cycle  mismatches  counts  qual  errorrate
+ *      20FUK.1        0          80   23368    25   3.47e-03
+ *      20FUK.1        1          40   23433    28   1.75e-03
+ *      20FUK.1        2          36   23453    28   1.58e-03
+ *      20FUK.1        3          26   23476    29   1.15e-03
+ *      20FUK.1        4          32   23495    29   1.40e-03
+ *      up to 101 cycles
+ *      20FUK.2        0          77   20886    24   3.73e-03
+ *      20FUK.2        1          28   20920    29   1.39e-03
+ *      20FUK.2        2          24   20931    29   1.19e-03
+ *      20FUK.2        3          30   20940    28   1.48e-03
+ *      20FUK.2        4          25   20948    29   1.24e-03
+ *      up to 101 cycles
+ *      20FUK.3        0          78   22038    24   3.58e-03
+ *      20FUK.3        1          40   22091    27   1.86e-03
+ *      20FUK.3        2          23   22108    30   1.09e-03
+ *      20FUK.3        3          36   22126    28   1.67e-03
+ *      
+ *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T ErrorRatePerCycle
+ *      -I bundle/current/b37/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam
+ *      -R bundle/current/b37/human_g1k_v37.fasta
+ *      -o example.gatkreport.txt
+ *  
+ * + * @author Kiran Garimella, Mark DePristo + */ +public class ErrorRatePerCycle extends LocusWalker { + @Output PrintStream out; + @Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false) + public Integer MIN_BASE_QUAL = 0; + @Argument(fullName="min_mapping_quality_score", shortName="mmq", doc="Minimum read mapping quality required to consider a read for calling", required=false) + public Integer MIN_MAPPING_QUAL = 20; + + private GATKReport report; + private GATKReportTable table; + private final static String reportName = "ErrorRatePerCycle"; + private final static String reportDescription = "The error rate per sequenced position in the reads"; + + /** + * Allows us to use multiple records for the key (read group x cycle) + */ + private static class TableKey implements Comparable { + final String readGroup; + final int cycle; + + private TableKey(final String readGroup, final int cycle) { + this.readGroup = readGroup; + this.cycle = cycle; + } + + @Override + public int compareTo(final TableKey tableKey) { + final int scmp = readGroup.compareTo(tableKey.readGroup); + if ( scmp == 0 ) + return Integer.valueOf(cycle).compareTo(tableKey.cycle); + else + return scmp; + } + } + + public void initialize() { + report = new GATKReport(); + report.addTable(reportName, reportDescription); + table = report.getTable(reportName); + table.addPrimaryKey("key", false); + table.addColumn("readgroup", 0); + table.addColumn("cycle", 0); + table.addColumn("mismatches", 0); + table.addColumn("counts", 0); + table.addColumn("qual", 0); + table.addColumn("errorrate", 0.0f, "%.2e"); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + for ( final PileupElement p : context.getBasePileup() ) { + final GATKSAMRecord read = p.getRead(); + final int offset = p.getOffset(); + final boolean firstOfPair = ! read.getReadPairedFlag() || read.getFirstOfPairFlag(); + + if ( firstOfPair && read.getMappingQuality() >= MIN_MAPPING_QUAL && p.getQual() >= MIN_BASE_QUAL ) { + final byte readBase = p.getBase(); + final byte refBase = ref.getBase(); + final int cycle = offset; + + if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) { + final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle); + + if ( ! table.containsKey(key) ) { + table.set(key, "cycle", cycle); + table.set(key, "readgroup", read.getReadGroup().getReadGroupId()); + } + + table.increment(key, "counts"); + if (readBase != refBase) + table.increment(key, "mismatches"); + } + } + } + + return null; + } + + public Integer reduceInit() { return null; } + + public Integer reduce(Integer value, Integer sum) { return null; } + + public void onTraversalDone(Integer sum) { + for ( final Object key : table.getPrimaryKeys() ) { + final int mismatches = (Integer)table.get(key, "mismatches"); + final int count = (Integer)table.get(key, "counts"); + final double errorRate = (mismatches + 1) / (1.0*(count + 1)); + final int qual = QualityUtils.probToQual(1-errorRate, 0.0); + table.set(key, "qual", qual); + table.set(key, "errorrate", errorRate); + } + + report.print(out); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java index d7a48d321..14985907d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java @@ -94,9 +94,6 @@ import java.util.Map; * * @author Mark DePristo */ - - - public class ReadGroupProperties extends ReadWalker { @Output public PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 200a250f2..26023bd2f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -454,8 +454,7 @@ public class HaplotypeIndelErrorModel { // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2) // First term is approximated by Jacobian log with table lookup. // Second term is a constant added to both likelihoods so will be ignored - haplotypeLikehoodMatrix[i][j] += MathUtils.softMax(readLikelihood[0], - readLikelihood[1]); + haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 6410d619d..64993b43a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -166,18 +166,17 @@ public class PairHMMIndelErrorModel { final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual]; - matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead, - YMetricArray[im1][jm1] + pBaseRead); + matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]}); final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1]; final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1]; - XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1); + XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1); // update Y array final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1]; final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1]; - YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2); + YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2); } } @@ -316,9 +315,7 @@ public class PairHMMIndelErrorModel { final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1; - final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ], - XMetricArray[bestI][bestJ], - YMetricArray[bestI][bestJ]); + final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] }); /* if (DEBUG) { @@ -651,7 +648,7 @@ public class PairHMMIndelErrorModel { private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) { final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes]; - // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix + // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplified to just a single loop without the intermediate NxN matrix for (int i=0; i < numHaplotypes; i++) { for (int j=i; j < numHaplotypes; j++){ // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] @@ -665,7 +662,7 @@ public class PairHMMIndelErrorModel { final double li = readLikelihoods[readIdx][i]; final double lj = readLikelihoods[readIdx][j]; final int readCount = readCounts[readIdx]; - haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF); + haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + LOG_ONE_HALF); } } } @@ -678,7 +675,7 @@ public class PairHMMIndelErrorModel { } } - // renormalize so that max element is zero. + // renormalize so that max element is zero. return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index aa9ae1517..59a7bd01a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -26,6 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.*; +import org.apache.commons.jexl2.Expression; +import org.apache.commons.jexl2.JexlContext; +import org.apache.commons.jexl2.JexlEngine; +import org.apache.commons.jexl2.MapContext; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -71,7 +75,7 @@ import java.util.*; *

* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing * data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs - * include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many + * include additional statistics such as mismatches and base qualitites around the calls, read strandness (how many * forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional * statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will * attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional @@ -88,6 +92,16 @@ import java.util.*; * bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged * on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups). * + * Which (putative) calls will make it into the output file(s) is controlled by an expression/list of expressions passed with -filter + * flag: if any of the expressions evaluate to TRUE, the site will be discarded. Otherwise the putative call and all the + * associated statistics will be printed into the output. Expressions recognize the following variables(in paired-sample + * somatic mode variables are prefixed with T_ and N_ for Tumor and Normal, e.g. N_COV and T_COV are defined instead of COV): + * COV for coverage at the site, INDEL_F for fraction of reads supporting consensus indel at the site (wrt total coverage), + * INDEL_CF for fraction of reads with consensus indel wrt all reads with an indel at the site, CONS_CNT for the count of + * reads supporting the consensus indel at the site. Conventional arithmetic and logical operations are supported. For instance, + * N_COV<4||T_COV<6||T_INDEL_F<0.3||T_INDEL_CF<0.7 instructs the tool to only output indel calls with at least 30% observed + * allelic fraction and with consensus indel making at least 70% of all indel observations at the site, and only at the sites + * where tumor coverage and normal coverage are at least 6 and 4, respectively. *

Input

*

* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode). @@ -147,30 +161,44 @@ public class SomaticIndelDetectorWalker extends ReadWalker { doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false) java.io.File bedOutput = null; + @Deprecated @Argument(fullName="minCoverage", shortName="minCoverage", doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+ - "with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false) + "with --unpaired (single sample) option, this value is used for minimum sample coverage. "+ + "INSTEAD USE: T_COV { "GENOMIC/UTR/INTRON/CODING and with the gene name", required=false) String RefseqFileName = null; + + @Argument(shortName="filter", doc="One or more logical expressions. If any of the expressions is TRUE, " + + "putative indel will be discarded and nothing will be printed into the output (unless genotyping "+ + "at the specific position is explicitly requested, see -genotype). "+ + "Default: T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7", required=false) + public ArrayList FILTER_EXPRESSIONS = new ArrayList(); + //@Argument(fullName="blacklistedLanes", shortName="BL", // doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+ // "by this application, so they will not contribute indels to consider and will not be counted.", required=false) @@ -221,7 +256,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { private Writer verboseWriter = null; - private static String annGenomic = "GENOMIC"; + private static String annGenomic = "GENOMIC\t"; private static String annIntron = "INTRON"; private static String annUTR = "UTR"; private static String annCoding = "CODING"; @@ -245,6 +280,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker { private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed; // can be 1 base before lastGenotyped start + private JexlEngine jexlEngine = new JexlEngine(); + private ArrayList jexlExpressions = new ArrayList(); + + // the following arrays store indel source-specific (normal/tumor) metric names + // for fast access when populating JEXL expression contexts (see IndelPrecall.fillContext()) + private final static String[] normalMetricsCassette = new String[4]; + private final static String[] tumorMetricsCassette = new String[4]; + private final static String[] singleMetricsCassette = new String[4]; + private final static int C_COV=0; + private final static int C_CONS_CNT=1; + private final static int C_INDEL_F=2; + private final static int C_INDEL_CF=3; + static { + normalMetricsCassette[C_COV] = "N_COV"; + tumorMetricsCassette[C_COV] = "T_COV"; + singleMetricsCassette[C_COV] = "COV"; + normalMetricsCassette[C_CONS_CNT] = "N_CONS_CNT"; + tumorMetricsCassette[C_CONS_CNT] = "T_CONS_CNT"; + singleMetricsCassette[C_CONS_CNT] = "CONS_CNT"; + normalMetricsCassette[C_INDEL_F] = "N_INDEL_F"; + tumorMetricsCassette[C_INDEL_F] = "T_INDEL_F"; + singleMetricsCassette[C_INDEL_F] = "INDEL_F"; + normalMetricsCassette[C_INDEL_CF] = "N_INDEL_CF"; + tumorMetricsCassette[C_INDEL_CF] = "T_INDEL_CF"; + singleMetricsCassette[C_INDEL_CF] = "INDEL_CF"; + } // "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt" @@ -389,6 +450,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker { vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ; refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile); + + // Now initialize JEXL expressions: + if ( FILTER_EXPRESSIONS.size() == 0 ) { + if ( call_unpaired ) { + FILTER_EXPRESSIONS.add("COV<6||INDEL_F<0.3||INDEL_CF<0.7"); + } else { + FILTER_EXPRESSIONS.add("T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7"); + } + } + for ( String s : FILTER_EXPRESSIONS ) { + try { + Expression e = jexlEngine.createExpression(s); + jexlExpressions.add(e); + } catch (Exception e) { + throw new UserException.BadArgumentValue("Filter expression", "Invalid expression used (" + s + "). Please see the JEXL docs for correct syntax.") ; + } + + } } @@ -661,14 +740,26 @@ public class SomaticIndelDetectorWalker extends ReadWalker { if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue; IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH); + JexlContext jc = new MapContext(); + normalCall.fillContext(jc,singleMetricsCassette); + boolean discard_event = false; - if ( normalCall.getCoverage() < minCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage + for ( Expression e : jexlExpressions ) { + if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; } } + if ( discard_event && ! genotype ) { + normal_context.indelsAt(pos).clear(); + continue; //* + } + +// if ( normalCall.getCoverage() < minCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } + if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos); long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() ); @@ -697,24 +788,16 @@ public class SomaticIndelDetectorWalker extends ReadWalker { location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos); - boolean haveCall = normalCall.isCall(); // cache the value - - if ( haveCall || genotype) { - if ( haveCall ) normalCallsMade++; - printVCFLine(vcf_writer,normalCall); - if ( bedWriter != null ) normalCall.printBedLine(bedWriter); - if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall); - lastGenotypedPosition = pos; - } + if ( ! discard_event ) normalCallsMade++; + printVCFLine(vcf_writer,normalCall, discard_event); + if ( bedWriter != null ) normalCall.printBedLine(bedWriter); + if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, discard_event); + lastGenotypedPosition = pos; normal_context.indelsAt(pos).clear(); // we dealt with this indel; don't want to see it again // (we might otherwise in the case when 1) there is another indel that follows // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel) - -// for ( IndelVariant var : variants ) { -// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount()); -// } } if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")"); @@ -829,18 +912,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker { IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH); IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH); - if ( tumorCall.getCoverage() < minCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage + JexlContext jc = new MapContext(); + tumorCall.fillContext(jc,tumorMetricsCassette); + normalCall.fillContext(jc,normalMetricsCassette); + boolean discard_event = false; + + for ( Expression e : jexlExpressions ) { + if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; } } - if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage + + if ( discard_event && ! genotype ) { + tumor_context.indelsAt(pos).clear(); + normal_context.indelsAt(pos).clear(); + continue; //* } +// if ( tumorCall.getCoverage() < minCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } +// if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } if ( DEBUG ) { System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, "); @@ -868,32 +965,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker { if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right -// location = getToolkit().getGenomeLocParser().setStart(location,pos); -// location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data - location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data - boolean haveCall = tumorCall.isCall(); // cache the value +// boolean haveCall = tumorCall.isCall(); // cache the value - if ( haveCall || genotype ) { - if ( haveCall ) tumorCallsMade++; + if ( ! discard_event ) tumorCallsMade++; - printVCFLine(vcf_writer,normalCall,tumorCall); + printVCFLine(vcf_writer,normalCall,tumorCall,discard_event); - if ( bedWriter != null ) tumorCall.printBedLine(bedWriter); + if ( bedWriter != null ) tumorCall.printBedLine(bedWriter); + + if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall, discard_event ); + lastGenotypedPosition = pos; - if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall ); - lastGenotypedPosition = pos; - } tumor_context.indelsAt(pos).clear(); normal_context.indelsAt(pos).clear(); // we dealt with this indel; don't want to see it again // (we might otherwise in the case when 1) there is another indel that follows // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel) - -// for ( IndelVariant var : variants ) { -// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount()); -// } } if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")"); @@ -947,14 +1036,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } - public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) { + public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, boolean discard_event) { RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location)); String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); StringBuilder fullRecord = new StringBuilder(); fullRecord.append(makeFullRecord(normalCall)); fullRecord.append(annotationString); - if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); + if ( discard_event && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); try { verboseWriter.write(fullRecord.toString()); verboseWriter.write('\n'); @@ -965,7 +1054,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } - public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) { + public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall, boolean discard_event) { RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location)); String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); @@ -1013,7 +1102,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { fullRecord.append('\t'); fullRecord.append(annotationString); - if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); + if ( discard_event && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); try { verboseWriter.write(fullRecord.toString()); @@ -1023,7 +1112,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } } - public void printVCFLine(VCFWriter vcf, IndelPrecall call) { + public void printVCFLine(VCFWriter vcf, IndelPrecall call, boolean discard_event) { long start = call.getPosition()-1; // If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed. @@ -1060,14 +1149,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker { Map attrs = call.makeStatsAttributes(null); - if ( call.isCall() ) // we made a call - put actual het genotype here: + if ( ! discard_event ) // we made a call - put actual het genotype here: genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false)); else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false)); } Set filters = null; - if ( call.getVariant() != null && ! call.isCall() ) { + if ( call.getVariant() != null && discard_event ) { filters = new HashSet(); filters.add("NoCall"); } @@ -1095,7 +1184,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } } - public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) { + public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall, boolean discard_event) { long start = tCall.getPosition()-1; long stop = start; @@ -1112,7 +1201,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { Map attrs = new HashMap(); boolean isSomatic = false; - if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) { + if ( nCall.getVariant() == null && tCall.getVariant() != null ) { isSomatic = true; attrs.put(VCFConstants.SOMATIC_KEY,true); } @@ -1155,7 +1244,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } Set filters = null; - if ( tCall.getVariant() != null && ! tCall.isCall() ) { + if ( tCall.getVariant() != null && discard_event ) { filters = new HashSet(); filters.add("NoCall"); } @@ -1602,6 +1691,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker { public IndelVariant getVariant() { return consensus_indel; } + public void fillContext(JexlContext context,String[] cassette) { + context.set(cassette[C_INDEL_F],((double)consensus_indel_count)/total_coverage); + context.set(cassette[C_INDEL_CF],((double)consensus_indel_count/all_indel_count)); + context.set(cassette[C_COV],total_coverage); + context.set(cassette[C_CONS_CNT],consensus_indel_count); + } +/* public boolean isCall() { boolean ret = ( consensus_indel_count >= minIndelCount && (double)consensus_indel_count > minFraction * total_coverage && @@ -1610,10 +1706,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker { " total_count="+all_indel_count+" cov="+total_coverage+ " minConsensusF="+((double)consensus_indel_count)/all_indel_count+ " minF="+((double)consensus_indel_count)/total_coverage); + return ret; - +// return true; } - +*/ /** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed * variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus) * @param variants diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 74291e025..d18c7e10a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import com.google.java.contract.Requires; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.util.IntervalTree; import net.sf.samtools.SAMSequenceRecord; @@ -19,11 +20,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; -import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; -import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; @@ -32,7 +30,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -389,9 +386,9 @@ public class VariantEvalWalker extends RodWalker implements Tr nec.apply(tracker, ref, context, comp, eval); } - // eval=null against all comps of different type + // eval=null against all comps of different type that aren't bound to another eval for ( VariantContext otherComp : compSet ) { - if ( otherComp != comp ) { + if ( otherComp != comp && ! compHasMatchingEval(otherComp, evalSetBySample) ) { synchronized (nec) { nec.apply(tracker, ref, context, otherComp, null); } @@ -409,6 +406,35 @@ public class VariantEvalWalker extends RodWalker implements Tr return null; } + @Requires({"comp != null", "evals != null"}) + private boolean compHasMatchingEval(final VariantContext comp, final Collection evals) { + // find all of the matching comps + for ( final VariantContext eval : evals ) { + if ( eval != null && doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) != EvalCompMatchType.NO_MATCH ) + return true; + } + + // nothing matched + return false; + } + + private enum EvalCompMatchType { NO_MATCH, STRICT, LENIENT } + + @Requires({"eval != null", "comp != null"}) + private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) { + // find all of the matching comps + if ( comp.getType() != eval.getType() ) + return EvalCompMatchType.NO_MATCH; + + // find the comp which matches both the reference allele and alternate allele from eval + final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0); + final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0); + if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference()))) + return EvalCompMatchType.STRICT; + else + return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT; + } + private VariantContext findMatchingComp(final VariantContext eval, final Collection comps) { // if no comps, return null if ( comps == null || comps.isEmpty() ) @@ -419,26 +445,21 @@ public class VariantEvalWalker extends RodWalker implements Tr return comps.iterator().next(); // find all of the matching comps - List matchingComps = new ArrayList(comps.size()); - for ( VariantContext comp : comps ) { - if ( comp.getType() == eval.getType() ) - matchingComps.add(comp); + VariantContext lenientMatch = null; + for ( final VariantContext comp : comps ) { + switch ( doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) ) { + case STRICT: + return comp; + case LENIENT: + if ( lenientMatch == null ) lenientMatch = comp; + break; + case NO_MATCH: + ; + } } - // if no matching comp, return null - if ( matchingComps.size() == 0 ) - return null; - - // find the comp which matches both the reference allele and alternate allele from eval - Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0); - for ( VariantContext comp : matchingComps ) { - Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0); - if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) ) - return comp; - } - - // if none match, just return the first one unless we require a strict match - return (requireStrictAlleleMatch ? null : matchingComps.get(0)); + // nothing matched, just return lenientMatch, which might be null + return lenientMatch; } public Integer treeReduce(Integer lhs, Integer rhs) { return null; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index cb44ca522..fdeb6919d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -417,8 +417,6 @@ public class VariantEvalUtils { } } else { stateKeys.add(stateKey); - - return stateKeys; } return stateKeys; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index 82776ca2e..6f0ae7c25 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -140,15 +140,16 @@ public class GaussianMixtureModel { } for( final VariantDatum datum : data ) { - final ArrayList pVarInGaussianLog10 = new ArrayList( gaussians.size() ); + final double[] pVarInGaussianLog10 = new double[gaussians.size()]; + int gaussianIndex = 0; for( final MultivariateGaussian gaussian : gaussians ) { final double pVarLog10 = gaussian.evaluateDatumLog10( datum ); - pVarInGaussianLog10.add( pVarLog10 ); + pVarInGaussianLog10[gaussianIndex++] = pVarLog10; } - final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10 ); - int iii = 0; + final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false ); + gaussianIndex = 0; for( final MultivariateGaussian gaussian : gaussians ) { - gaussian.assignPVarInGaussian( pVarInGaussianNormalized[iii++] ); //BUGBUG: to clean up + gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] ); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 7cc5b1625..3cdcf4982 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -372,6 +372,8 @@ public class VariantRecalibrator extends RodWalker= 0", "hapStartInRefCoords >= 0"}) + public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) { if( refAllele.length() != altAllele.length() ) { refInsertLocation++; } - int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation); + int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(hapStartInRefCoords, haplotypeCigar, refInsertLocation); if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype - return getBases().clone(); + return bases.clone(); } - haplotypeInsertLocation += numBasesAddedToStartOfHaplotype; - final byte[] newHaplotype = getBases().clone(); + byte[] newHaplotype; try { if( refAllele.length() == altAllele.length() ) { // SNP or MNP + newHaplotype = bases.clone(); for( int iii = 0; iii < altAllele.length(); iii++ ) { newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; } - } else if( refAllele.length() < altAllele.length() ) { // insertion + } else if( refAllele.length() < altAllele.length() ) { // insertion final int altAlleleLength = altAllele.length(); + newHaplotype = new byte[bases.length + altAlleleLength]; + for( int iii = 0; iii < bases.length; iii++ ) { + newHaplotype[iii] = bases[iii]; + } for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) { newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; } @@ -138,24 +143,17 @@ public class Haplotype { newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; } } else { // deletion - int refHaplotypeOffset = 0; - for( final CigarElement ce : haplotypeCigar.getCigarElements()) { - if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); } - else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); } - } - for( int iii = 0; iii < altAllele.length(); iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; - } final int shift = refAllele.length() - altAllele.length(); - for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) { - newHaplotype[iii] = newHaplotype[iii+shift]; + newHaplotype = new byte[bases.length - shift]; + for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) { + newHaplotype[iii] = bases[iii]; } - for( int iii = 0; iii < shift; iii++ ) { - newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii]; + for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) { + newHaplotype[iii] = bases[iii+shift]; } } } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding - return getBases().clone(); + return bases.clone(); } return newHaplotype; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 6f2db67ee..a96cbffc5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -41,14 +41,6 @@ import java.util.*; * @author Kiran Garimella */ public class MathUtils { - /** Public constants - used for the Lanczos approximation to the factorial function - * (for the calculation of the binomial/multinomial probability in logspace) - * @param LANC_SEQ[] - an array holding the constants which correspond to the product - * of Chebyshev Polynomial coefficients, and points on the Gamma function (for interpolation) - * [see A Precision Approximation of the Gamma Function J. SIAM Numer. Anal. Ser. B, Vol. 1 1964. pp. 86-96] - * @param LANC_G - a value for the Lanczos approximation to the gamma function that works to - * high precision - */ /** * Private constructor. No instantiating this class! @@ -56,6 +48,28 @@ public class MathUtils { private MathUtils() { } + public static final double[] log10Cache; + private static final double[] jacobianLogTable; + private static final double JACOBIAN_LOG_TABLE_STEP = 0.001; + private static final double MAX_JACOBIAN_TOLERANCE = 10.0; + private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1; + private static final int MAXN = 11000; + private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients + + static { + log10Cache = new double[LOG10_CACHE_SIZE]; + jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; + + log10Cache[0] = Double.NEGATIVE_INFINITY; + for (int k = 1; k < LOG10_CACHE_SIZE; k++) + log10Cache[k] = Math.log10(k); + + for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { + jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); + + } + } + // A fast implementation of the Math.round() method. This method does not perform // under/overflow checking, so this shouldn't be used in the general case (but is fine // if one is already make those checks before calling in to the rounding). @@ -536,7 +550,7 @@ public class MathUtils { // all negative) the largest value; also, we need to convert to normal-space. double maxValue = Utils.findMaxEntry(array); - // we may decide to just normalize in log space with converting to linear space + // we may decide to just normalize in log space without converting to linear space if (keepInLogSpace) { for (int i = 0; i < array.length; i++) array[i] -= maxValue; @@ -563,29 +577,6 @@ public class MathUtils { return normalized; } - public static double[] normalizeFromLog10(List array, boolean takeLog10OfOutput) { - double[] normalized = new double[array.size()]; - - // for precision purposes, we need to add (or really subtract, since they're - // all negative) the largest value; also, we need to convert to normal-space. - double maxValue = MathUtils.arrayMaxDouble(array); - for (int i = 0; i < array.size(); i++) - normalized[i] = Math.pow(10, array.get(i) - maxValue); - - // normalize - double sum = 0.0; - for (int i = 0; i < array.size(); i++) - sum += normalized[i]; - for (int i = 0; i < array.size(); i++) { - double x = normalized[i] / sum; - if (takeLog10OfOutput) - x = Math.log10(x); - normalized[i] = x; - } - - return normalized; - } - /** * normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE). * @@ -596,10 +587,6 @@ public class MathUtils { return normalizeFromLog10(array, false); } - public static double[] normalizeFromLog10(List array) { - return normalizeFromLog10(array, false); - } - public static int maxElementIndex(final double[] array) { return maxElementIndex(array, array.length); } @@ -1207,78 +1194,11 @@ public class MathUtils { return ((double) num) / (Math.max(denom, 1)); } - public static final double[] log10Cache; - public static final double[] jacobianLogTable; - public static final int JACOBIAN_LOG_TABLE_SIZE = 101; - public static final double JACOBIAN_LOG_TABLE_STEP = 0.1; - public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP; - public static final double MAX_JACOBIAN_TOLERANCE = 10.0; - private static final int MAXN = 11000; - private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients - - static { - log10Cache = new double[LOG10_CACHE_SIZE]; - jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; - - log10Cache[0] = Double.NEGATIVE_INFINITY; - for (int k = 1; k < LOG10_CACHE_SIZE; k++) - log10Cache[k] = Math.log10(k); - - for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { - jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); - - } - } - - static public double softMax(final double[] vec) { - double acc = vec[0]; - for (int k = 1; k < vec.length; k++) - acc = softMax(acc, vec[k]); - - return acc; - - } - static public double max(double x0, double x1, double x2) { double a = Math.max(x0, x1); return Math.max(a, x2); } - static public double softMax(final double x0, final double x1, final double x2) { - // compute naively log10(10^x[0] + 10^x[1]+...) - // return Math.log10(MathUtils.sumLog10(vec)); - - // better approximation: do Jacobian logarithm function on data pairs - double a = softMax(x0, x1); - return softMax(a, x2); - } - - static public double softMax(final double x, final double y) { - // we need to compute log10(10^x + 10^y) - // By Jacobian logarithm identity, this is equal to - // max(x,y) + log10(1+10^-abs(x-y)) - // we compute the second term as a table lookup - // with integer quantization - - // slow exact version: - // return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y)); - - double diff = x - y; - - if (diff > MAX_JACOBIAN_TOLERANCE) - return x; - else if (diff < -MAX_JACOBIAN_TOLERANCE) - return y; - else if (diff >= 0) { - int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5); - return x + jacobianLogTable[ind]; - } - else { - int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5); - return y + jacobianLogTable[ind]; - } - } - public static double phredScaleToProbability(byte q) { return Math.pow(10, (-q) / 10.0); } @@ -1734,6 +1654,4 @@ public class MathUtils { return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length. } - - } diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index fb133d902..62a67a1f2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -4,6 +4,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -314,6 +315,15 @@ public class ClippingOp { if (start == 0) hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar)); + if (read.hasBaseIndelQualities()) { + byte[] newBaseInsertionQuals = new byte[newLength]; + byte[] newBaseDeletionQuals = new byte[newLength]; + System.arraycopy(read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength); + System.arraycopy(read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength); + hardClippedRead.setBaseQualities(newBaseInsertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION); + hardClippedRead.setBaseQualities(newBaseDeletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION); + } + return hardClippedRead; } diff --git a/public/java/src/org/broadinstitute/sting/utils/crypt/CryptUtils.java b/public/java/src/org/broadinstitute/sting/utils/crypt/CryptUtils.java new file mode 100644 index 000000000..e84b1432e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/crypt/CryptUtils.java @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.io.IOUtils; + +import javax.crypto.Cipher; +import java.io.File; +import java.io.InputStream; +import java.security.*; +import java.security.spec.InvalidKeySpecException; +import java.security.spec.KeySpec; +import java.security.spec.PKCS8EncodedKeySpec; +import java.security.spec.X509EncodedKeySpec; +import java.util.Arrays; + +/** + * A set of cryptographic utility methods and constants. + * + * Contains methods to: + * + * -Create a public/private key pair + * -Read and write public/private keys to/from files/streams + * -Load the GATK master private/public keys + * -Encrypt/decrypt data + * + * Also contains constants that control the cryptographic defaults + * throughout the GATK. + * + * @author David Roazen + */ +public class CryptUtils { + + // --------------------------------------------------------------------------------- + // Constants (these control the default cryptographic settings throughout the GATK): + // --------------------------------------------------------------------------------- + + /** + * Default key length in bits of newly-created keys. 2048 bits provides a good balance between + * security and speed. + */ + public static final int DEFAULT_KEY_LENGTH = 2048; + + /** + * Default encryption algorithm to use, when none is specified. + */ + public static final String DEFAULT_ENCRYPTION_ALGORITHM = "RSA"; + + /** + * Default random-number generation algorithm to use, when none is specified. + */ + public static final String DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM = "SHA1PRNG"; + + /** + * Name of the public key file distributed with the GATK. This file is packaged + * into the GATK jar, and we use the system ClassLoader to find it. + */ + public static final String GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME = "GATK_public.key"; + + /** + * Location of the master copy of the GATK private key. + */ + public static final String GATK_MASTER_PRIVATE_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_private.key"; + + /** + * Location of the master copy of the GATK public key. This file should always be the same as + * the public key file distributed with the GATK (and there are automated tests to ensure that it is). + */ + public static final String GATK_MASTER_PUBLIC_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_public.key"; + + /** + * Directory where generated GATK user keys are stored. See the GATKKey class for more information. + */ + public static final String GATK_USER_KEY_DIRECTORY = "/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/"; + + + // ----------------------- + // Utility Methods: + // ----------------------- + + /** + * Generate a new public/private key pair using the default encryption settings defined above. + * + * @return A new public/private key pair created using the default settings + */ + public static KeyPair generateKeyPair() { + return generateKeyPair(DEFAULT_KEY_LENGTH, DEFAULT_ENCRYPTION_ALGORITHM, DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM); + } + + /** + * Generate a new public/private key pair using custom encryption settings. + * + * @param keyLength Length of the key in bits + * @param encryptionAlgorithm Encryption algorithm to use + * @param randNumberAlgorithm Random-number generation algorithm to use + * @return A new public/private key pair, created according to the specified parameters + */ + public static KeyPair generateKeyPair( int keyLength, String encryptionAlgorithm, String randNumberAlgorithm ) { + try { + KeyPairGenerator keyGen = KeyPairGenerator.getInstance(encryptionAlgorithm); + SecureRandom randomnessSource = createRandomnessSource(randNumberAlgorithm); + + keyGen.initialize(keyLength, randomnessSource); + return keyGen.generateKeyPair(); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e); + } + catch ( Exception e ) { + throw new ReviewedStingException("Error while generating key pair", e); + } + } + + /** + * Create a source of randomness using the default random-number generation algorithm. + * + * @return A randomness source that uses the default algorithm + */ + public static SecureRandom createRandomnessSource() { + return createRandomnessSource(DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM); + } + + /** + * Create a source of randomness using a custom random-number generation algorithm. + * + * @param randAlgorithm The random-number generation algorithm to use + * @return A randomness sources that uses the specified algorithm + */ + public static SecureRandom createRandomnessSource ( String randAlgorithm ) { + try { + return SecureRandom.getInstance(randAlgorithm); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested random-number generation algorithm %s", randAlgorithm), e); + } + } + + /** + * Writes a public/private key pair to disk + * + * @param keyPair The key pair we're writing to disk + * @param privateKeyFile Location to write the private key + * @param publicKeyFile Location to write the public key + */ + public static void writeKeyPair ( KeyPair keyPair, File privateKeyFile, File publicKeyFile ) { + writeKey(keyPair.getPrivate(), privateKeyFile); + writeKey(keyPair.getPublic(), publicKeyFile); + } + + /** + * Writes an arbitrary key to disk + * + * @param key The key to write + * @param destination Location to write the key to + */ + public static void writeKey ( Key key, File destination ) { + IOUtils.writeByteArrayToFile(key.getEncoded(), destination); + } + + /** + * Reads in a public key created using the default encryption algorithm from a file. + * + * @param source File containing the public key + * @return The public key read + */ + public static PublicKey readPublicKey ( File source ) { + return decodePublicKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM); + } + + /** + * Reads in a public key created using the default encryption algorithm from a stream. + * + * @param source Stream attached to the public key + * @return The public key read + */ + public static PublicKey readPublicKey ( InputStream source ) { + return decodePublicKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM); + } + + /** + * Decodes the raw bytes of a public key into a usable object. + * + * @param rawKey The encoded bytes of a public key as read from, eg., a file. The + * key must be in the standard X.509 format for a public key. + * @param encryptionAlgorithm The encryption algorithm used to create the public key + * @return The public key as a usable object + */ + public static PublicKey decodePublicKey ( byte[] rawKey, String encryptionAlgorithm ) { + try { + KeySpec keySpec = new X509EncodedKeySpec(rawKey); + KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm); + return keyFactory.generatePublic(keySpec); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e); + } + catch ( InvalidKeySpecException e ) { + throw new ReviewedStingException("Unable to use X.509 key specification to decode the given key", e); + } + } + + /** + * Reads in a private key created using the default encryption algorithm from a file. + * + * @param source File containing the private key + * @return The private key read + */ + public static PrivateKey readPrivateKey ( File source ) { + return decodePrivateKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM); + } + + /** + * Reads in a private key created using the default encryption algorithm from a stream. + * + * @param source Stream attached to the private key + * @return The private key read + */ + public static PrivateKey readPrivateKey ( InputStream source ) { + return decodePrivateKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM); + } + + /** + * Decodes the raw bytes of a private key into a usable object. + * + * @param rawKey The encoded bytes of a private key as read from, eg., a file. The + * key must be in the standard PKCS #8 format for a private key. + * @param encryptionAlgorithm The encryption algorithm used to create the private key + * @return The private key as a usable object + */ + public static PrivateKey decodePrivateKey ( byte[] rawKey, String encryptionAlgorithm ) { + try { + KeySpec keySpec = new PKCS8EncodedKeySpec(rawKey); + KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm); + return keyFactory.generatePrivate(keySpec); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e); + } + catch ( InvalidKeySpecException e ) { + throw new ReviewedStingException("Unable to use the PKCS #8 key specification to decode the given key", e); + } + } + + /** + * Loads the copy of the GATK public key that is distributed with the GATK. Uses the system + * ClassLoader to locate the public key file, which should be stored at the root of the GATK + * jar file. + * + * @return The GATK public key as a usable object + */ + public static PublicKey loadGATKDistributedPublicKey() { + InputStream publicKeyInputStream = ClassLoader.getSystemResourceAsStream(GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME); + + if ( publicKeyInputStream == null ) { + throw new ReviewedStingException(String.format("Could not locate the GATK public key %s in the classpath", + GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME)); + } + + return readPublicKey(publicKeyInputStream); + } + + /** + * Loads the master copy of the GATK private key. You must have the appropriate UNIX permissions + * to do this! + * + * @return The GATK master private key as a usable object + */ + public static PrivateKey loadGATKMasterPrivateKey() { + return readPrivateKey(new File(GATK_MASTER_PRIVATE_KEY_FILE)); + } + + /** + * Loads the master copy of the GATK public key. This should always be the same as the + * public key distributed with the GATK returned by loadGATKDistributedPublicKey(). + * + * @return The GATK master public key as a usable object + */ + public static PublicKey loadGATKMasterPublicKey() { + return readPublicKey(new File(GATK_MASTER_PUBLIC_KEY_FILE)); + } + + /** + * Encrypts the given data using the key provided. + * + * @param data The data to encrypt, as a byte array + * @param encryptKey The key with which to encrypt the data + * @return The encrypted version of the provided data + */ + public static byte[] encryptData ( byte[] data, Key encryptKey ) { + return transformDataUsingCipher(data, encryptKey, Cipher.ENCRYPT_MODE); + } + + /** + * Decrypts the given data using the key provided. + * + * @param encryptedData Data to decrypt, as a byte array + * @param decryptKey The key with which to decrypt the data + * @return The decrypted version of the provided data + */ + public static byte[] decryptData ( byte[] encryptedData, Key decryptKey ) { + return transformDataUsingCipher(encryptedData, decryptKey, Cipher.DECRYPT_MODE); + } + + /** + * Helper method for encryption/decryption that takes data and processes it using + * the given key + * + * @param data Data to encrypt/decrypt + * @param key Key to use to encrypt/decrypt the data + * @param cipherMode Specifies whether we are encrypting or decrypting + * @return The encrypted/decrypted data + */ + private static byte[] transformDataUsingCipher ( byte[] data, Key key, int cipherMode ) { + try { + Cipher cipher = Cipher.getInstance(key.getAlgorithm()); + cipher.init(cipherMode, key); + return cipher.doFinal(data); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested algorithm %s", + key.getAlgorithm()), e); + } + catch ( InvalidKeyException e ) { + throw new ReviewedStingException("Key is invalid", e); + } + catch ( GeneralSecurityException e ) { + throw new ReviewedStingException("Error during encryption", e); + } + } + + /** + * Tests whether the public/private keys provided can each decrypt data encrypted by + * the other key -- ie., tests whether these two keys are part of the same public/private + * key pair. + * + * @param privateKey The private key to test + * @param publicKey The public key to test + * @return True if the keys are part of the same key pair and can decrypt each other's + * encrypted data, otherwise false. + */ + public static boolean keysDecryptEachOther ( PrivateKey privateKey, PublicKey publicKey ) { + byte[] plainText = "Test PlainText".getBytes(); + + byte[] dataEncryptedUsingPrivateKey = CryptUtils.encryptData(plainText, privateKey); + byte[] dataEncryptedUsingPublicKey = CryptUtils.encryptData(plainText, publicKey); + + byte[] privateKeyDataDecryptedWithPublicKey = CryptUtils.decryptData(dataEncryptedUsingPrivateKey, publicKey); + byte[] publicKeyDataDecryptedWithPrivateKey = CryptUtils.decryptData(dataEncryptedUsingPublicKey, privateKey); + + // Make sure we actually transformed the data during encryption: + if ( Arrays.equals(plainText, dataEncryptedUsingPrivateKey) || + Arrays.equals(plainText, dataEncryptedUsingPublicKey) || + Arrays.equals(dataEncryptedUsingPrivateKey, dataEncryptedUsingPublicKey) ) { + return false; + } + + // Make sure that we were able to recreate the original plaintext using + // both the public key on the private-key-encrypted data and the private + // key on the public-key-encrypted data: + if ( ! Arrays.equals(plainText, privateKeyDataDecryptedWithPublicKey) || + ! Arrays.equals(plainText, publicKeyDataDecryptedWithPrivateKey) ) { + return false; + } + + return true; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/crypt/GATKKey.java b/public/java/src/org/broadinstitute/sting/utils/crypt/GATKKey.java new file mode 100644 index 000000000..408cb56aa --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/crypt/GATKKey.java @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.IOUtils; + +import java.io.*; +import java.security.*; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +/** + * Class to represent a GATK user key. + * + * A GATK user key contains an email address and a cryptographic signature. + * The signature is the SHA-1 hash of the email address encrypted using + * the GATK master private key. The GATK master public key (distributed + * with the GATK) is used to decrypt the signature and validate the key + * at the start of each GATK run that requires a key. + * + * Keys are cryptographically secure in that valid keys definitely come + * from us and cannot be fabricated, however nothing prevents keys from + * being shared between users. + * + * GATK user keys have the following on-disk format: + * + * GZIP Container: + * Email address + * NUL byte (delimiter) + * Cryptographic Signature (encrypted SHA-1 hash of email address) + * + * The key data is wrapped within a GZIP container to placate over-zealous + * email filters (since keys must often be emailed) and also to provide an + * additional integrity check via the built-in GZIP CRC. + * + * @author David Roazen + */ +public class GATKKey { + + /** + * Private key used to sign the GATK key. Required only when creating a new + * key from scratch, not when loading an existing key from disk. + */ + private PrivateKey privateKey; + + /** + * Public key used to validate the GATK key. + */ + private PublicKey publicKey; + + /** + * The user's email address, stored within the key and signed. + */ + private String emailAddress; + + /** + * The cryptographic signature of the email address. By default, this is + * the SHA-1 hash of the email address encrypted using the RSA algorithm. + */ + private byte[] signature; + + /** + * The combination of hash/encryption algorithms to use to generate the signature. + * By default this is "SHA1withRSA" + */ + private String signingAlgorithm; + + /** + * Default hash/encryption algorithms to use to sign the key. + */ + public static final String DEFAULT_SIGNING_ALGORITHM = "SHA1withRSA"; + + /** + * Byte value used to separate the email address from its signature in the key file. + */ + public static final byte GATK_KEY_SECTIONAL_DELIMITER = 0; + + + // ----------------------- + // Constructors: + // ----------------------- + + /** + * Constructor to create a new GATK key from scratch using an email address + * and public/private key pair. The private key is used for signing, and the + * public key is used to validate the newly-created key. + * + * @param privateKey Private key used to sign the new GATK key + * @param publicKey Public key used to validate the new GATK key + * @param emailAddress The user's email address, which we will store in the key and sign + */ + public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress ) { + this(privateKey, publicKey, emailAddress, DEFAULT_SIGNING_ALGORITHM); + } + + /** + * Constructor to create a new GATK key from scratch using an email address + * and public/private key pair, and additionally specify the signing algorithm + * to use. The private key is used for signing, and the public key is used to + * validate the newly-created key. + * + * @param privateKey Private key used to sign the new GATK key + * @param publicKey Public key used to validate the new GATK key + * @param emailAddress The user's email address, which we will store in the key and sign + * @param signingAlgorithm The combination of hash and encryption algorithms to use to sign the key + */ + public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress, String signingAlgorithm ) { + if ( privateKey == null || publicKey == null || emailAddress == null || emailAddress.length() == 0 || signingAlgorithm == null ) { + throw new ReviewedStingException("Cannot construct GATKKey using null/empty arguments"); + } + + this.privateKey = privateKey; + this.publicKey = publicKey; + this.emailAddress = emailAddress; + this.signingAlgorithm = signingAlgorithm; + + validateEmailAddress(); + generateSignature(); + + if ( ! isValid() ) { + throw new ReviewedStingException("Newly-generated GATK key fails validation -- this should never happen!"); + } + } + + /** + * Constructor to load an existing GATK key from a file. + * + * During loading, the key file is checked for integrity, but not cryptographic + * validity (which must be done through a subsequent call to isValid()). + * + * @param publicKey Public key that will be used to validate the loaded GATK key + * in subsequent calls to isValid() + * @param keyFile File containing the GATK key to load + */ + public GATKKey ( PublicKey publicKey, File keyFile ) { + this(publicKey, keyFile, DEFAULT_SIGNING_ALGORITHM); + } + + /** + * Constructor to load an existing GATK key from a file, and additionally specify + * the signing algorithm used to sign the key being loaded. + * + * During loading, the key file is checked for integrity, but not cryptographic + * validity (which must be done through a subsequent call to isValid()). + * + * @param publicKey Public key that will be used to validate the loaded GATK key + * in subsequent calls to isValid() + * @param keyFile File containing the GATK key to load + * @param signingAlgorithm The combination of hash and encryption algorithms used to sign the key + */ + public GATKKey ( PublicKey publicKey, File keyFile, String signingAlgorithm ) { + if ( publicKey == null || keyFile == null || signingAlgorithm == null ) { + throw new ReviewedStingException("Cannot construct GATKKey using null arguments"); + } + + this.publicKey = publicKey; + this.signingAlgorithm = signingAlgorithm; + + readKey(keyFile); + } + + // ----------------------- + // Public API Methods: + // ----------------------- + + /** + * Writes out this key to a file in the format described at the top of this class, + * encapsulating the key within a GZIP container. + * + * @param destination File to write the key to + */ + public void writeKey ( File destination ) { + try { + byte[] keyBytes = marshalKeyData(); + IOUtils.writeByteArrayToStream(keyBytes, new GZIPOutputStream(new FileOutputStream(destination))); + } + catch ( IOException e ) { + throw new UserException.CouldNotCreateOutputFile(destination, e); + } + } + + /** + * Checks whether the signature of this key is cryptographically valid (ie., can be + * decrypted by the public key to produce a valid SHA-1 hash of the email address + * in the key). + * + * @return True if the key's signature passes validation, otherwise false + */ + public boolean isValid() { + try { + Signature sig = Signature.getInstance(signingAlgorithm); + sig.initVerify(publicKey); + sig.update(emailAddress.getBytes()); + return sig.verify(signature); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e); + } + catch ( InvalidKeyException e ) { + // If the GATK public key is invalid, it's likely our problem, not the user's: + throw new ReviewedStingException(String.format("Public key %s is invalid", publicKey), e); + } + catch ( SignatureException e ) { + throw new UserException.UnreadableKeyException("Signature is invalid or signing algorithm was unable to process the input data", e); + } + } + + // ----------------------- + // Private Helper Methods: + // ----------------------- + + /** + * Helper method that creates a signature for this key using the combination of + * hash/encryption algorithms specified at construction time. + */ + private void generateSignature() { + try { + Signature sig = Signature.getInstance(signingAlgorithm); + sig.initSign(privateKey, CryptUtils.createRandomnessSource()); + sig.update(emailAddress.getBytes()); + signature = sig.sign(); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e); + } + catch ( InvalidKeyException e ) { + throw new ReviewedStingException(String.format("Private key %s is invalid", privateKey), e); + } + catch ( SignatureException e ) { + throw new ReviewedStingException(String.format("Error creating signature for email address %s", emailAddress), e); + } + } + + /** + * Helper method that reads in a GATK key from a file. Should not be called directly -- + * use the appropriate constructor above. + * + * @param source File to read the key from + */ + private void readKey ( File source ) { + try { + byte[] keyBytes = IOUtils.readStreamIntoByteArray(new GZIPInputStream(new FileInputStream(source))); + + // As a sanity check, compare the number of bytes read to the uncompressed file size + // stored in the GZIP ISIZE field. If they don't match, the key must be corrupt: + if ( keyBytes.length != IOUtils.getGZIPFileUncompressedSize(source) ) { + throw new UserException.UnreadableKeyException("Number of bytes read does not match the uncompressed size specified in the GZIP ISIZE field"); + } + + unmarshalKeyData(keyBytes); + } + catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(source, e); + } + catch ( IOException e ) { + throw new UserException.UnreadableKeyException(source, e); + } + catch ( UserException.CouldNotReadInputFile e ) { + throw new UserException.UnreadableKeyException(source, e); + } + } + + /** + * Helper method that assembles the email address and signature into a format + * suitable for writing to disk. + * + * @return The aggregated key data, ready to be written to disk + */ + private byte[] marshalKeyData() { + byte[] emailAddressBytes = emailAddress.getBytes(); + byte[] assembledKey = new byte[emailAddressBytes.length + 1 + signature.length]; + + System.arraycopy(emailAddressBytes, 0, assembledKey, 0, emailAddressBytes.length); + assembledKey[emailAddressBytes.length] = GATK_KEY_SECTIONAL_DELIMITER; + System.arraycopy(signature, 0, assembledKey, emailAddressBytes.length + 1, signature.length); + + return assembledKey; + } + + /** + * Helper method that parses the raw key data from disk into its component + * email address and signature. Performs some basic validation in the process. + * + * @param keyBytes The raw, uncompressed key data read from disk + */ + private void unmarshalKeyData ( byte[] keyBytes ) { + int delimiterPosition = -1; + + for ( int i = 0; i < keyBytes.length; i++ ) { + if ( keyBytes[i] == GATK_KEY_SECTIONAL_DELIMITER ) { + delimiterPosition = i; + break; + } + } + + if ( delimiterPosition == -1 ) { + throw new UserException.UnreadableKeyException("Malformed GATK key contains no sectional delimiter"); + } + else if ( delimiterPosition == 0 ) { + throw new UserException.UnreadableKeyException("Malformed GATK key contains no email address"); + } + else if ( delimiterPosition == keyBytes.length - 1 ) { + throw new UserException.UnreadableKeyException("Malformed GATK key contains no signature"); + } + + byte[] emailAddressBytes = new byte[delimiterPosition]; + System.arraycopy(keyBytes, 0, emailAddressBytes, 0, delimiterPosition); + emailAddress = new String(emailAddressBytes); + + signature = new byte[keyBytes.length - delimiterPosition - 1]; + System.arraycopy(keyBytes, delimiterPosition + 1, signature, 0, keyBytes.length - delimiterPosition - 1); + } + + /** + * Helper method that ensures that the user's email address does not contain the NUL byte, which we + * reserve as a delimiter within each key file. + */ + private void validateEmailAddress() { + for ( byte b : emailAddress.getBytes() ) { + if ( b == GATK_KEY_SECTIONAL_DELIMITER ) { + throw new UserException(String.format("Email address must not contain a byte with value %d", GATK_KEY_SECTIONAL_DELIMITER)); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 2ece3b077..6cc8008d2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -132,6 +132,10 @@ public class UserException extends ReviewedStingException { public CouldNotReadInputFile(File file, Exception e) { this(file, e.getMessage()); } + + public CouldNotReadInputFile(String message) { + super(message); + } } @@ -151,6 +155,10 @@ public class UserException extends ReviewedStingException { public CouldNotCreateOutputFile(File file, Exception e) { super(String.format("Couldn't write file %s because exception %s", file.getAbsolutePath(), e.getMessage())); } + + public CouldNotCreateOutputFile(String message, Exception e) { + super(message, e); + } } public static class MissortedBAM extends UserException { @@ -319,4 +327,32 @@ public class UserException extends ReviewedStingException { "and try again.", null); } } + + public static class UnreadableKeyException extends UserException { + public UnreadableKeyException ( File f, Exception e ) { + super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " + + "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", + f.getAbsolutePath(), e.getMessage())); + } + + public UnreadableKeyException ( String message, Exception e ) { + this(String.format("%s. Error was: %s", message, e.getMessage())); + } + + public UnreadableKeyException ( String message ) { + super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " + + "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", + message)); + } + } + + public static class KeySignatureVerificationException extends UserException { + public KeySignatureVerificationException ( File f ) { + super(String.format("The signature in key file %s failed cryptographic verification. " + + "If this key was valid in the past, it's likely been revoked. " + + "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + + "for help.", + f.getAbsolutePath())); + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 7104b1edd..eea45567f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -151,23 +151,16 @@ public class FragmentUtils { final int numBases = firstReadStop + secondRead.getReadLength(); final byte[] bases = new byte[numBases]; final byte[] quals = new byte[numBases]; - // BUGBUG: too verbose, clean this up. final byte[] insertionQuals = new byte[numBases]; final byte[] deletionQuals = new byte[numBases]; final byte[] firstReadBases = firstRead.getReadBases(); final byte[] firstReadQuals = firstRead.getBaseQualities(); - final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities(); - final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities(); final byte[] secondReadBases = secondRead.getReadBases(); final byte[] secondReadQuals = secondRead.getBaseQualities(); - final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities(); - final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities(); for(int iii = 0; iii < firstReadStop; iii++) { bases[iii] = firstReadBases[iii]; quals[iii] = firstReadQuals[iii]; - insertionQuals[iii] = firstReadInsertionQuals[iii]; - deletionQuals[iii] = firstReadDeletionQuals[iii]; } for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) { if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) { @@ -175,22 +168,16 @@ public class FragmentUtils { } bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] ); quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] ); - insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score - deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score } for(int iii = firstRead.getReadLength(); iii < numBases; iii++) { bases[iii] = secondReadBases[iii-firstReadStop]; quals[iii] = secondReadQuals[iii-firstReadStop]; - insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop]; - deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop]; } final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader()); returnRead.setAlignmentStart(firstRead.getUnclippedStart()); returnRead.setReadBases( bases ); - returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ); - returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION ); - returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION ); + returnRead.setBaseQualities( quals ); returnRead.setReadGroup( firstRead.getReadGroup() ); returnRead.setReferenceName( firstRead.getReferenceName() ); final CigarElement c = new CigarElement(bases.length, CigarOperator.M); @@ -199,6 +186,27 @@ public class FragmentUtils { returnRead.setCigar( new Cigar( cList )); returnRead.setMappingQuality( firstRead.getMappingQuality() ); + if( firstRead.hasBaseIndelQualities() || secondRead.hasBaseIndelQualities() ) { + final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities(); + final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities(); + final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities(); + final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities(); + for(int iii = 0; iii < firstReadStop; iii++) { + insertionQuals[iii] = firstReadInsertionQuals[iii]; + deletionQuals[iii] = firstReadDeletionQuals[iii]; + } + for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) { + insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score + deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score + } + for(int iii = firstRead.getReadLength(); iii < numBases; iii++) { + insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop]; + deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop]; + } + returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION ); + returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION ); + } + final ArrayList returnList = new ArrayList(); returnList.add(returnRead); return returnList; diff --git a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java index a5ba857ef..160df0e51 100644 --- a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java @@ -29,10 +29,13 @@ import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.LineIterator; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.*; public class IOUtils { @@ -400,4 +403,173 @@ public class IOUtils { public static boolean isSpecialFile(File file) { return file != null && (file.getAbsolutePath().startsWith("/dev/") || file.equals(DEV_DIR)); } + + /** + * Reads the entirety of the given file into a byte array. Uses a read buffer size of 4096 bytes. + * + * @param source File to read + * @return The contents of the file as a byte array + */ + public static byte[] readFileIntoByteArray ( File source ) { + return readFileIntoByteArray(source, 4096); + } + + /** + * Reads the entirety of the given file into a byte array using the requested read buffer size. + * + * @param source File to read + * @param readBufferSize Number of bytes to read in at one time + * @return The contents of the file as a byte array + */ + public static byte[] readFileIntoByteArray ( File source, int readBufferSize ) { + if ( source == null ) { + throw new ReviewedStingException("Source file was null"); + } + + byte[] fileContents; + + try { + fileContents = readStreamIntoByteArray(new FileInputStream(source), readBufferSize); + } + catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(source, e); + } + + if ( fileContents.length != source.length() ) { + throw new UserException.CouldNotReadInputFile(String.format("Unable to completely read file %s: read only %d/%d bytes", + source.getAbsolutePath(), fileContents.length, source.length())); + } + + return fileContents; + } + + /** + * Reads all data from the given stream into a byte array. Uses a read buffer size of 4096 bytes. + * + * @param in Stream to read data from + * @return The contents of the stream as a byte array + */ + public static byte[] readStreamIntoByteArray ( InputStream in ) { + return readStreamIntoByteArray(in, 4096); + } + + /** + * Reads all data from the given stream into a byte array using the requested read buffer size. + * + * @param in Stream to read data from + * @param readBufferSize Number of bytes to read in at one time + * @return The contents of the stream as a byte array + */ + public static byte[] readStreamIntoByteArray ( InputStream in, int readBufferSize ) { + if ( in == null ) { + throw new ReviewedStingException("Input stream was null"); + } + else if ( readBufferSize <= 0 ) { + throw new ReviewedStingException("Read buffer size must be > 0"); + } + + // Use a fixed-size buffer for each read, but a dynamically-growing buffer + // to hold the accumulated contents of the file/stream: + byte[] readBuffer = new byte[readBufferSize]; + ByteArrayOutputStream fileBuffer = new ByteArrayOutputStream(readBufferSize * 4); + + try { + try { + int currentBytesRead; + + while ( (currentBytesRead = in.read(readBuffer, 0, readBuffer.length)) >= 0 ) { + fileBuffer.write(readBuffer, 0, currentBytesRead); + } + } + finally { + in.close(); + } + } + catch ( IOException e ) { + throw new UserException.CouldNotReadInputFile("I/O error reading from input stream", e); + } + + return fileBuffer.toByteArray(); + } + + /** + * Writes the given array of bytes to a file + * + * @param bytes Data to write + * @param destination File to write the data to + */ + public static void writeByteArrayToFile ( byte[] bytes, File destination ) { + if ( destination == null ) { + throw new ReviewedStingException("Destination file was null"); + } + + try { + writeByteArrayToStream(bytes, new FileOutputStream(destination)); + } + catch ( FileNotFoundException e ) { + throw new UserException.CouldNotCreateOutputFile(destination, e); + } + } + + /** + * Writes the given array of bytes to a stream + * + * @param bytes Data to write + * @param out Stream to write the data to + */ + public static void writeByteArrayToStream ( byte[] bytes, OutputStream out ) { + if ( bytes == null || out == null ) { + throw new ReviewedStingException("Data to write or output stream was null"); + } + + try { + try { + out.write(bytes); + } + finally { + out.close(); + } + } + catch ( IOException e ) { + throw new UserException.CouldNotCreateOutputFile("I/O error writing to output stream", e); + } + } + + /** + * Determines the uncompressed size of a GZIP file. Uses the GZIP ISIZE field in the last + * 4 bytes of the file to get this information. + * + * @param gzipFile GZIP-format file whose uncompressed size to determine + * @return The uncompressed size (in bytes) of the GZIP file + */ + public static int getGZIPFileUncompressedSize ( File gzipFile ) { + if ( gzipFile == null ) { + throw new ReviewedStingException("GZIP file to examine was null"); + } + + try { + // The GZIP ISIZE field holds the uncompressed size of the compressed data. + // It occupies the last 4 bytes of any GZIP file: + RandomAccessFile in = new RandomAccessFile(gzipFile, "r"); + in.seek(gzipFile.length() - 4); + byte[] sizeBytes = new byte[4]; + in.read(sizeBytes, 0, 4); + + ByteBuffer byteBuf = ByteBuffer.wrap(sizeBytes); + byteBuf.order(ByteOrder.LITTLE_ENDIAN); // The GZIP spec mandates little-endian byte order + int uncompressedSize = byteBuf.getInt(); + + // If the size read in is negative, we've overflowed our signed integer: + if ( uncompressedSize < 0 ) { + throw new UserException.CouldNotReadInputFile(String.format("Cannot accurately determine the uncompressed size of file %s " + + "because it's either larger than %d bytes or the GZIP ISIZE field is corrupt", + gzipFile.getAbsolutePath(), Integer.MAX_VALUE)); + } + + return uncompressedSize; + } + catch ( IOException e ) { + throw new UserException.CouldNotReadInputFile(gzipFile, e); + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f5a9b2f45..648dafb81 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -194,6 +194,10 @@ public class GATKSAMRecord extends BAMRecord { } } + public boolean hasBaseIndelQualities() { + return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null; + } + public byte[] getBaseInsertionQualities() { byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) ); if( quals == null ) { diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index ac3a970f9..bc4ce098b 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -6,6 +6,7 @@ import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; import org.apache.log4j.spi.LoggingEvent; import org.broadinstitute.sting.commandline.CommandLineUtils; +import org.broadinstitute.sting.utils.crypt.CryptUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.io.IOUtils; @@ -87,6 +88,9 @@ public abstract class BaseTest { public static final File testDirFile = new File("public/testdata/"); public static final String testDir = testDirFile.getAbsolutePath() + "/"; + public static final String keysDataLocation = validationDataLocation + "keys/"; + public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key"; + /** before the class starts up */ static { // setup a basic log configuration diff --git a/public/java/test/org/broadinstitute/sting/MedianUnitTest.java b/public/java/test/org/broadinstitute/sting/MedianUnitTest.java index c12db9b77..db89aee78 100644 --- a/public/java/test/org/broadinstitute/sting/MedianUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/MedianUnitTest.java @@ -29,7 +29,6 @@ package org.broadinstitute.sting; // the imports for unit testing. -import org.broadinstitute.sting.gatk.walkers.haplotypecaller.LikelihoodCalculationEngine; import org.broadinstitute.sting.utils.Median; import org.testng.Assert; import org.testng.annotations.BeforeSuite; @@ -42,12 +41,6 @@ import java.util.List; public class MedianUnitTest extends BaseTest { - LikelihoodCalculationEngine engine; - - @BeforeSuite - public void before() { - engine = new LikelihoodCalculationEngine(0, 0, false); - } // -------------------------------------------------------------------------------- // diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index ca7653b58..c9e3b6b1b 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -30,6 +30,7 @@ import org.broad.tribble.FeatureCodec; import org.broad.tribble.Tribble; import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.CommandLineGATK; @@ -45,7 +46,7 @@ import java.io.File; import java.util.*; public class WalkerTest extends BaseTest { - private static final boolean ENABLE_REPORTING = false; + private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false; @BeforeMethod public void initializeRandomGenerator() { @@ -121,11 +122,19 @@ public class WalkerTest extends BaseTest { } public class WalkerTestSpec { + + // Arguments implicitly included in all Walker command lines, unless explicitly + // disabled using the disableImplicitArgs() method below. + final String IMPLICIT_ARGS = ENABLE_PHONE_HOME_FOR_TESTS ? + String.format("-et %s", GATKRunReport.PhoneHomeOption.STANDARD) : + String.format("-et %s -K %s", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile); + String args = ""; int nOutputFiles = -1; List md5s = null; List exts = null; Class expectedException = null; + boolean includeImplicitArgs = true; // the default output path for the integration test private File outputFileLocation = null; @@ -159,6 +168,10 @@ public class WalkerTest extends BaseTest { this.expectedException = expectedException; } + public String getArgsWithImplicitArgs() { + return args + (includeImplicitArgs ? " " + IMPLICIT_ARGS : ""); + } + public void setOutputFileLocation(File outputFileLocation) { this.outputFileLocation = outputFileLocation; } @@ -180,6 +193,9 @@ public class WalkerTest extends BaseTest { auxillaryFiles.put(expectededMD5sum, outputfile); } + public void disableImplicitArgs() { + includeImplicitArgs = false; + } } protected boolean parameterize() { @@ -213,7 +229,7 @@ public class WalkerTest extends BaseTest { tmpFiles.add(fl); } - final String args = String.format(spec.args, tmpFiles.toArray()); + final String args = String.format(spec.getArgsWithImplicitArgs(), tmpFiles.toArray()); System.out.println(Utils.dupString('-', 80)); if ( spec.expectsException() ) { @@ -277,13 +293,10 @@ public class WalkerTest extends BaseTest { * @param args the argument list * @param expectedException the expected exception or null */ - public static void executeTest(String name, String args, Class expectedException) { + private void executeTest(String name, String args, Class expectedException) { CommandLineGATK instance = new CommandLineGATK(); String[] command = Utils.escapeExpressions(args); - // add the logging level to each of the integration test commands - command = Utils.appendArray(command, "-et", ENABLE_REPORTING ? "STANDARD" : "NO_ET"); - // run the executable boolean gotAnException = false; try { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java new file mode 100644 index 000000000..accb9c0cf --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diagnostics; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class ErrorRatePerCycleIntegrationTest extends WalkerTest { + @Test + public void basicTest() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s", + 1, + Arrays.asList("0cc212ecb6df300e321784039ff29f13")); + executeTest("ErrorRatePerCycle:", spec); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 823eeeeb9..cfb0d11a1 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("202b337ebbea3def1be8495eb363dfa8")); + Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232")); executeTest("test MultiSample Pilot1", spec); } @@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("ae29b9c9aacce8046dc780430540cd62")); + Arrays.asList("c5b53231f4f6d9524bc4ec8115f44f5c")); executeTest("test SingleSample Pilot2", spec); } @@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1, - Arrays.asList("10027d13befaa07b7900a7af0ae0791c")); + Arrays.asList("5af005255240a2186f04cb50851b8b6f")); executeTest("test Multiple SNP alleles", spec); } @@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65"; + private final static String COMPRESSED_OUTPUT_MD5 = "a08df9aea2b3df09cf90ff8e6e3be3ea"; @Test public void testCompressedOutput() { @@ -91,7 +91,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "32a34362dff51d8b73a3335048516d82"; + String md5 = "6358934c1c26345013a38261b8c45aa4"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -179,8 +179,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "2cb2544739e01f6c08fd820112914317" ); - e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" ); + e.put( 0.01, "926b58038dd4989bf7eda697a847eea9" ); + e.put( 1.0 / 1850, "93f44105b43b65730a3b821e27b0fa16" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -204,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("f0fbe472f155baf594b1eeb58166edef")); + Arrays.asList("a1b75a7e12b160b0be823228c958573f")); executeTest(String.format("test multiple technologies"), spec); } @@ -223,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272")); + Arrays.asList("3bda1279cd6dcb47885f3e19466f11b9")); executeTest(String.format("test calling with BAQ"), spec); } @@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899")); + Arrays.asList("d9fc3ba94a0d46029778c7b457e7292a")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -257,7 +257,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6")); + Arrays.asList("b2e30ae3e5ffa6108f9f6178b1d2e679")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f")); + Arrays.asList("2cd182a84613fa91a6020466d2d327e2")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd")); + Arrays.asList("9cd08dc412a007933381e9c76c073899")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); } @@ -290,7 +290,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("320f61c87253aba77d6dc782242cba8b")); + Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } @@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("c9897b80615c53a4ea10a4b193d56d9c")); + Arrays.asList("2609675a356f2dfc86f8a1d911210978")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("5282fdb1711a532d726c13507bf80a21")); + Arrays.asList("4fdd8da77167881b71b3547da5c13f94")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 5c3a43c96..36c093e8f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("f909fd8374f663e983b9b3fda4cf5cf1") + Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943") ); executeTest("testFunctionClassWithSnpeff", spec); } @@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("4f60acc8a4b21c4b4ccb51ad9071449c")); + 1, Arrays.asList("c49e239292704447a36e01ee9a71e729")); executeTestParallel("testSelect1", spec); } @@ -335,7 +335,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("190e1a171132832bf92fbca56a9c40bb")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -347,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("08586d443fdcf3b7f63b8f9e3a943c62")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -463,7 +463,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef") + Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c") ); executeTest("testModernVCFWithLargeIndels", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java new file mode 100644 index 000000000..25bd7a2eb --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils; + + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Basic unit test for Haplotype Class + */ +public class HaplotypeUnitTest extends BaseTest { + @BeforeClass + public void init() { + } + + @Test + public void testSimpleInsertionAllele() { + final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA"; + basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testSimpleDeletionAllele() { + final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; + basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testSimpleSNPAllele() { + final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA"; + basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testComplexInsertionAllele() { + final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + h1CigarList.add(new CigarElement(10, CigarOperator.I)); + h1CigarList.add(new CigarElement(8, CigarOperator.M)); + h1CigarList.add(new CigarElement(3, CigarOperator.D)); + h1CigarList.add(new CigarElement(7, CigarOperator.M)); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC"; + basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testComplexDeletionAllele() { + final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + h1CigarList.add(new CigarElement(10, CigarOperator.I)); + h1CigarList.add(new CigarElement(8, CigarOperator.M)); + h1CigarList.add(new CigarElement(3, CigarOperator.D)); + h1CigarList.add(new CigarElement(7, CigarOperator.M)); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC"; + basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testComplexSNPAllele() { + final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + h1CigarList.add(new CigarElement(10, CigarOperator.I)); + h1CigarList.add(new CigarElement(8, CigarOperator.M)); + h1CigarList.add(new CigarElement(3, CigarOperator.D)); + h1CigarList.add(new CigarElement(7, CigarOperator.M)); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "AGCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("T", "G", 1, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCTATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("G", "T", 7, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGCGGGA" + "AGGC"; + basicInsertTest("G", "C", 17, h1Cigar, bases, h1bases); + } + + private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) { + final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1); + final Haplotype h = new Haplotype(hap.getBytes()); + final Allele h1refAllele = Allele.create(ref, true); + final Allele h1altAllele = Allele.create(alt, false); + final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE, 0, cigar) ); + final Haplotype h1expected = new Haplotype(newHap.getBytes()); + Assert.assertEquals(h1, h1expected); + + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index 75fc44873..1ba6c74d4 100755 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -205,6 +205,24 @@ public class MathUtilsUnitTest extends BaseTest { } } + /** + * Private functions used by testArrayShuffle() + */ + private boolean hasUniqueElements(Object[] x) { + for (int i = 0; i < x.length; i++) + for (int j = i + 1; j < x.length; j++) + if (x[i].equals(x[j]) || x[i] == x[j]) + return false; + return true; + } + + private boolean hasAllElements(final Object[] expected, final Object[] actual) { + HashSet set = new HashSet(); + set.addAll(Arrays.asList(expected)); + set.removeAll(Arrays.asList(actual)); + return set.isEmpty(); + } + @Test(enabled = true) public void testIntAndBitSetConversion() { Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428); @@ -234,26 +252,71 @@ public class MathUtilsUnitTest extends BaseTest { Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31) } + @Test + public void testApproximateLog10SumLog10() { + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3); - private boolean hasUniqueElements(Object[] x) { - for (int i = 0; i < x.length; i++) - for (int j = i + 1; j < x.length; j++) - if (x[i].equals(x[j]) || x[i] == x[j]) - return false; + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3); + + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3); + } + + @Test + public void testNormalizeFromLog10() { + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9})); + + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211})); + } + + /** + * Private function used by testNormalizeFromLog10() + */ + private boolean compareDoubleArrays(double[] b1, double[] b2) { + if( b1.length != b2.length ) { + return false; // sanity check + } + + for( int i=0; i < b1.length; i++ ){ + if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 ) + return false; + } return true; } - - private boolean hasAllElements(final Object[] expected, final Object[] actual) { - HashSet set = new HashSet(); - set.addAll(Arrays.asList(expected)); - set.removeAll(Arrays.asList(actual)); - return set.isEmpty(); - } - - private void p (Object []x) { - for (Object v: x) - System.out.print((Integer) v + " "); - System.out.println(); - } - } diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java new file mode 100644 index 000000000..f5cfea148 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.SkipException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import org.testng.Assert; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.Key; +import java.security.KeyPair; +import java.security.PrivateKey; +import java.security.PublicKey; +import java.util.Arrays; + +public class CryptUtilsUnitTest extends BaseTest { + + @Test + public void testGenerateValidKeyPairWithDefaultSettings() { + KeyPair keyPair = CryptUtils.generateKeyPair(); + Assert.assertTrue(CryptUtils.keysDecryptEachOther(keyPair.getPrivate(), keyPair.getPublic())); + } + + @DataProvider( name = "InvalidKeyPairSettings" ) + public Object[][] invalidKeyPairSettingsDataProvider() { + return new Object[][] { + { -1, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM}, + { CryptUtils.DEFAULT_KEY_LENGTH, "Made-up algorithm", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM}, + { CryptUtils.DEFAULT_KEY_LENGTH, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, "Made-up algorithm"} + }; + } + + @Test( dataProvider = "InvalidKeyPairSettings", expectedExceptions = ReviewedStingException.class ) + public void testGenerateKeyPairWithInvalidSettings( int keyLength, String encryptionAlgorithm, String randomNumberGenerationAlgorithm ) { + KeyPair keyPair = CryptUtils.generateKeyPair(keyLength, encryptionAlgorithm, randomNumberGenerationAlgorithm); + } + + @Test + public void testGATKMasterKeyPairMutualDecryption() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testGATKMasterKeyPairMutualDecryption")); + } + + Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey())); + } + + @Test + public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption")); + } + + Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey())); + } + + @Test + public void testKeyPairWriteThenRead() { + KeyPair keyPair = CryptUtils.generateKeyPair(); + File privateKeyFile = createTempFile("testKeyPairWriteThenRead_private", "key"); + File publicKeyFile = createTempFile("testKeyPairWriteThenRead_public", "key"); + + CryptUtils.writeKeyPair(keyPair, privateKeyFile, publicKeyFile); + + assertKeysAreEqual(keyPair.getPrivate(), CryptUtils.readPrivateKey(privateKeyFile)); + assertKeysAreEqual(keyPair.getPublic(), CryptUtils.readPublicKey(publicKeyFile)); + } + + @Test + public void testPublicKeyWriteThenReadFromFile() { + File keyFile = createTempFile("testPublicKeyWriteThenReadFromFile", "key"); + PublicKey publicKey = CryptUtils.generateKeyPair().getPublic(); + + CryptUtils.writeKey(publicKey, keyFile); + + assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(keyFile)); + } + + @Test + public void testPublicKeyWriteThenReadFromStream() throws IOException { + File keyFile = createTempFile("testPublicKeyWriteThenReadFromStream", "key"); + PublicKey publicKey = CryptUtils.generateKeyPair().getPublic(); + + CryptUtils.writeKey(publicKey, keyFile); + + assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(new FileInputStream(keyFile))); + } + + @Test + public void testPrivateKeyWriteThenReadFromFile() { + File keyFile = createTempFile("testPrivateKeyWriteThenReadFromFile", "key"); + PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate(); + + CryptUtils.writeKey(privateKey, keyFile); + + assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(keyFile)); + } + + @Test + public void testPrivateKeyWriteThenReadFromStream() throws IOException { + File keyFile = createTempFile("testPrivateKeyWriteThenReadFromStream", "key"); + PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate(); + + CryptUtils.writeKey(privateKey, keyFile); + + assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(new FileInputStream(keyFile))); + } + + @Test( expectedExceptions = UserException.CouldNotReadInputFile.class ) + public void testReadNonExistentPublicKey() { + File nonExistentFile = new File("jdshgkdfhg.key"); + Assert.assertFalse(nonExistentFile.exists()); + + CryptUtils.readPublicKey(nonExistentFile); + } + + @Test( expectedExceptions = UserException.CouldNotReadInputFile.class ) + public void testReadNonExistentPrivateKey() { + File nonExistentFile = new File("jdshgkdfhg.key"); + Assert.assertFalse(nonExistentFile.exists()); + + CryptUtils.readPrivateKey(nonExistentFile); + } + + @Test + public void testDecodePublicKey() { + PublicKey originalKey = CryptUtils.generateKeyPair().getPublic(); + PublicKey decodedKey = CryptUtils.decodePublicKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM); + assertKeysAreEqual(originalKey, decodedKey); + } + + @Test + public void testDecodePrivateKey() { + PrivateKey originalKey = CryptUtils.generateKeyPair().getPrivate(); + PrivateKey decodedKey = CryptUtils.decodePrivateKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM); + assertKeysAreEqual(originalKey, decodedKey); + } + + @Test + public void testLoadGATKMasterPrivateKey() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testLoadGATKMasterPrivateKey")); + } + + PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); + } + + @Test + public void testLoadGATKMasterPublicKey() { + PublicKey gatkMasterPublicKey = CryptUtils.loadGATKMasterPublicKey(); + } + + @Test + public void testLoadGATKDistributedPublicKey() { + PublicKey gatkDistributedPublicKey = CryptUtils.loadGATKDistributedPublicKey(); + } + + private void assertKeysAreEqual( Key originalKey, Key keyFromDisk ) { + Assert.assertTrue(Arrays.equals(originalKey.getEncoded(), keyFromDisk.getEncoded())); + Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm()); + Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat()); + } + + private boolean gatkPrivateKeyExistsButReadPermissionDenied() { + File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE); + return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead(); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java new file mode 100644 index 000000000..8fb75ef38 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class GATKKeyIntegrationTest extends WalkerTest { + + public static final String BASE_COMMAND = String.format("-T PrintReads -R %s -I %s -o %%s", + testDir + "exampleFASTA.fasta", + testDir + "exampleBAM.bam"); + public static final String MD5_UPON_SUCCESSFUL_RUN = "b9dc5bf6753ca2819e70b056eaf61258"; + + + private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) { + String command = BASE_COMMAND + String.format(" %s %s", etArg, keyArg); + + WalkerTestSpec spec = expectedException != null ? + new WalkerTestSpec(command, 1, expectedException) : + new WalkerTestSpec(command, 1, Arrays.asList(md5)); + + spec.disableImplicitArgs(); // Turn off automatic inclusion of -et/-K args by WalkerTest + executeTest(testName, spec); + } + + @Test + public void testValidKeyNoET() { + runGATKKeyTest("testValidKeyNoET", + "-et " + GATKRunReport.PhoneHomeOption.NO_ET, + "-K " + keysDataLocation + "valid.key", + null, + MD5_UPON_SUCCESSFUL_RUN); + } + + @Test + public void testValidKeyETStdout() { + runGATKKeyTest("testValidKeyETStdout", + "-et " + GATKRunReport.PhoneHomeOption.STDOUT, + "-K " + keysDataLocation + "valid.key", + null, + MD5_UPON_SUCCESSFUL_RUN); + } + + @Test + public void testValidKeyETStandard() { + runGATKKeyTest("testValidKeyETStandard", + "", + "-K " + keysDataLocation + "valid.key", + null, + MD5_UPON_SUCCESSFUL_RUN); + } + + @Test + public void testNoKeyNoET() { + runGATKKeyTest("testNoKeyNoET", + "-et " + GATKRunReport.PhoneHomeOption.NO_ET, + "", + UserException.class, + null); + } + + @Test + public void testNoKeyETStdout() { + runGATKKeyTest("testNoKeyETStdout", + "-et " + GATKRunReport.PhoneHomeOption.STDOUT, + "", + UserException.class, + null); + } + + @Test + public void testNoKeyETStandard() { + runGATKKeyTest("testNoKeyETStandard", + "", + "", + null, + MD5_UPON_SUCCESSFUL_RUN); + } + + @Test + public void testRevokedKey() { + runGATKKeyTest("testRevokedKey", + "-et " + GATKRunReport.PhoneHomeOption.NO_ET, + "-K " + keysDataLocation + "revoked.key", + UserException.KeySignatureVerificationException.class, + null); + } + + @DataProvider(name = "CorruptKeyTestData") + public Object[][] corruptKeyDataProvider() { + return new Object[][] { + { "corrupt_empty.key", UserException.UnreadableKeyException.class }, + { "corrupt_single_byte_file.key", UserException.UnreadableKeyException.class }, + { "corrupt_random_contents.key", UserException.UnreadableKeyException.class }, + { "corrupt_single_byte_deletion.key", UserException.UnreadableKeyException.class }, + { "corrupt_single_byte_insertion.key", UserException.UnreadableKeyException.class }, + { "corrupt_single_byte_change.key", UserException.UnreadableKeyException.class }, + { "corrupt_multi_byte_deletion.key", UserException.UnreadableKeyException.class }, + { "corrupt_multi_byte_insertion.key", UserException.UnreadableKeyException.class }, + { "corrupt_multi_byte_change.key", UserException.UnreadableKeyException.class }, + { "corrupt_bad_isize_field.key", UserException.UnreadableKeyException.class }, + { "corrupt_bad_crc.key", UserException.UnreadableKeyException.class }, + { "corrupt_no_email_address.key", UserException.UnreadableKeyException.class }, + { "corrupt_no_sectional_delimiter.key", UserException.KeySignatureVerificationException.class }, + { "corrupt_no_signature.key", UserException.UnreadableKeyException.class }, + { "corrupt_bad_signature.key", UserException.KeySignatureVerificationException.class }, + { "corrupt_non_gzipped_valid_key.key", UserException.UnreadableKeyException.class } + }; + } + + @Test(dataProvider = "CorruptKeyTestData") + public void testCorruptKey ( String corruptKeyName, Class expectedException ) { + runGATKKeyTest(String.format("testCorruptKey (%s)", corruptKeyName), + "-et " + GATKRunReport.PhoneHomeOption.NO_ET, + "-K " + keysDataLocation + corruptKeyName, + expectedException, + null); + } + + @Test + public void testCorruptButNonRequiredKey() { + runGATKKeyTest("testCorruptButNonRequiredKey", + "", + "-K " + keysDataLocation + "corrupt_random_contents.key", + null, + MD5_UPON_SUCCESSFUL_RUN); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java new file mode 100644 index 000000000..660f95796 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.SkipException; +import org.testng.annotations.Test; +import org.testng.Assert; + +import java.io.File; +import java.security.KeyPair; +import java.security.PrivateKey; +import java.security.PublicKey; + +public class GATKKeyUnitTest extends BaseTest { + + @Test + public void testCreateGATKKeyUsingMasterKeyPair() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testCreateGATKKeyUsingMasterKeyPair")); + } + + PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); + PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey(); + + // We should be able to create a valid GATKKey using our master key pair: + GATKKey key = new GATKKey(masterPrivateKey, masterPublicKey, "foo@bar.com"); + Assert.assertTrue(key.isValid()); + } + + @Test + public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey")); + } + + PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); + PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey(); + + // We should also be able to create a valid GATKKey using our master private + // key and the public key we distribute with the GATK: + GATKKey key = new GATKKey(masterPrivateKey, distributedPublicKey, "foo@bar.com"); + Assert.assertTrue(key.isValid()); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testKeyPairMismatch() { + KeyPair firstKeyPair = CryptUtils.generateKeyPair(); + KeyPair secondKeyPair = CryptUtils.generateKeyPair(); + + // Attempting to create a GATK Key with private and public keys that aren't part of the + // same key pair should immediately trigger a validation failure: + GATKKey key = new GATKKey(firstKeyPair.getPrivate(), secondKeyPair.getPublic(), "foo@bar.com"); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testEncryptionAlgorithmMismatch() { + KeyPair keyPair = CryptUtils.generateKeyPair(CryptUtils.DEFAULT_KEY_LENGTH, "DSA", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM); + + // Attempting to use a DSA private key to create an RSA signature should throw an error: + GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), "foo@bar.com", "SHA1withRSA"); + } + + @Test( expectedExceptions = UserException.class ) + public void testInvalidEmailAddress() { + String emailAddressWithNulByte = new String(new byte[] { 0 }); + KeyPair keyPair = CryptUtils.generateKeyPair(); + + // Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file: + GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), emailAddressWithNulByte); + } + + @Test + public void testCreateGATKKeyFromValidKeyFile() { + GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "valid.key")); + Assert.assertTrue(key.isValid()); + } + + @Test( expectedExceptions = UserException.UnreadableKeyException.class ) + public void testCreateGATKKeyFromCorruptKeyFile() { + GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "corrupt_random_contents.key")); + } + + @Test + public void testCreateGATKKeyFromRevokedKeyFile() { + GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "revoked.key")); + Assert.assertFalse(key.isValid()); + } + + @Test( expectedExceptions = UserException.CouldNotReadInputFile.class ) + public void testCreateGATKKeyFromNonExistentFile() { + File nonExistentFile = new File("ghfdkgsdhg.key"); + Assert.assertFalse(nonExistentFile.exists()); + + GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile); + } + + private boolean gatkPrivateKeyExistsButReadPermissionDenied() { + File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE); + return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead(); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java index 757e6efdf..941d2b14c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java @@ -27,12 +27,18 @@ package org.broadinstitute.sting.utils.io; import org.apache.commons.io.FileUtils; import org.broadinstitute.sting.BaseTest; import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.Random; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; public class IOUtilsUnitTest extends BaseTest { @@ -230,4 +236,90 @@ public class IOUtilsUnitTest extends BaseTest { Assert.assertFalse(IOUtils.isSpecialFile(new File("/home/user/my.file"))); Assert.assertFalse(IOUtils.isSpecialFile(new File("/devfake/null"))); } + + @DataProvider( name = "ByteArrayIOTestData") + public Object[][] byteArrayIOTestDataProvider() { + return new Object[][] { + // file size, read buffer size + { 0, 4096 }, + { 1, 4096 }, + { 2000, 4096 }, + { 4095, 4096 }, + { 4096, 4096 }, + { 4097, 4096 }, + { 6000, 4096 }, + { 8191, 4096 }, + { 8192, 4096 }, + { 8193, 4096 }, + { 10000, 4096 } + }; + } + + @Test( dataProvider = "ByteArrayIOTestData" ) + public void testWriteThenReadFileIntoByteArray ( int fileSize, int readBufferSize ) throws Exception { + File tempFile = createTempFile(String.format("testWriteThenReadFileIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp"); + + byte[] dataWritten = getDeterministicRandomData(fileSize); + IOUtils.writeByteArrayToFile(dataWritten, tempFile); + byte[] dataRead = IOUtils.readFileIntoByteArray(tempFile, readBufferSize); + + Assert.assertEquals(dataRead.length, dataWritten.length); + Assert.assertTrue(Arrays.equals(dataRead, dataWritten)); + } + + @Test( dataProvider = "ByteArrayIOTestData" ) + public void testWriteThenReadStreamIntoByteArray ( int fileSize, int readBufferSize ) throws Exception { + File tempFile = createTempFile(String.format("testWriteThenReadStreamIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp"); + + byte[] dataWritten = getDeterministicRandomData(fileSize); + IOUtils.writeByteArrayToStream(dataWritten, new FileOutputStream(tempFile)); + byte[] dataRead = IOUtils.readStreamIntoByteArray(new FileInputStream(tempFile), readBufferSize); + + Assert.assertEquals(dataRead.length, dataWritten.length); + Assert.assertTrue(Arrays.equals(dataRead, dataWritten)); + } + + @Test( expectedExceptions = UserException.CouldNotReadInputFile.class ) + public void testReadNonExistentFileIntoByteArray() { + File nonExistentFile = new File("djfhsdkjghdfk"); + Assert.assertFalse(nonExistentFile.exists()); + + IOUtils.readFileIntoByteArray(nonExistentFile); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testReadNullStreamIntoByteArray() { + IOUtils.readStreamIntoByteArray(null); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testReadStreamIntoByteArrayInvalidBufferSize() throws Exception { + IOUtils.readStreamIntoByteArray(new FileInputStream(createTempFile("testReadStreamIntoByteArrayInvalidBufferSize", "tmp")), + -1); + } + + @Test( expectedExceptions = UserException.CouldNotCreateOutputFile.class ) + public void testWriteByteArrayToUncreatableFile() { + IOUtils.writeByteArrayToFile(new byte[]{0}, new File("/dev/foo/bar")); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testWriteNullByteArrayToFile() { + IOUtils.writeByteArrayToFile(null, createTempFile("testWriteNullByteArrayToFile", "tmp")); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testWriteByteArrayToNullStream() { + IOUtils.writeByteArrayToStream(new byte[]{0}, null); + } + + private byte[] getDeterministicRandomData ( int size ) { + GenomeAnalysisEngine.resetRandomGenerator(); + Random rand = GenomeAnalysisEngine.getRandomGenerator(); + + byte[] randomData = new byte[size]; + rand.nextBytes(randomData); + + return randomData; + } } diff --git a/public/keys/GATK_public.key b/public/keys/GATK_public.key new file mode 100644 index 000000000..05cdde1c2 Binary files /dev/null and b/public/keys/GATK_public.key differ diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml index 283b5eabf..68459f6d2 100644 --- a/public/packages/GATKEngine.xml +++ b/public/packages/GATKEngine.xml @@ -36,6 +36,8 @@ + +