diff --git a/build.xml b/build.xml index 1df75cd1d..d3e25d424 100644 --- a/build.xml +++ b/build.xml @@ -47,6 +47,7 @@ + @@ -567,6 +568,7 @@ + @@ -615,6 +617,9 @@ + + + @@ -879,6 +884,7 @@ + diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R index 46bbf7eda..876cf5cbc 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R @@ -4,11 +4,9 @@ colnames(d) = tableHeader; for (i in 1:ncol(d)) { - v = suppressWarnings(as.numeric(d[,i])); - - if (length(na.omit(as.numeric(v))) == length(d[,i])) { - d[,i] = v; - } + # use the general type.convert infrastructure of read.table to convert column data to R types + v = type.convert(d[,i]) + d[,i] = v; } usedNames = ls(envir=tableEnv, pattern=tableName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 32002e093..e5aaf2338 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -35,9 +35,12 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.crypt.CryptUtils; +import org.broadinstitute.sting.utils.crypt.GATKKey; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.ListFileUtils; +import java.security.PublicKey; import java.util.*; /** @@ -78,6 +81,9 @@ public abstract class CommandLineExecutable extends CommandLineProgram { Walker walker = engine.getWalkerByName(getAnalysisName()); try { + // Make sure a valid GATK user key is present, if required. + authorizeGATKRun(); + engine.setArguments(getArgumentCollection()); // File lists can require a bit of additional expansion. Set these explicitly by the engine. @@ -130,6 +136,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return 0; } + /** + * Authorizes this run of the GATK by checking for a valid GATK user key, if required. + * Currently, a key is required only if running with the -et NO_ET or -et STDOUT options. + */ + private void authorizeGATKRun() { + if ( getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.NO_ET || + getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) { + if ( getArgumentCollection().gatkKeyFile == null ) { + throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " + + "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + + "for more information and instructions on how to obtain a key."); + } + else { + PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey(); + GATKKey gatkUserKey = new GATKKey(gatkPublicKey, getArgumentCollection().gatkKeyFile); + + if ( ! gatkUserKey.isValid() ) { + throw new UserException.KeySignatureVerificationException(getArgumentCollection().gatkKeyFile); + } + } + } + } /** * Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled. diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 8ec707801..02d211a0c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -65,9 +65,12 @@ public class GATKArgumentCollection { @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false) public Integer readBufferSize = null; - @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false) + @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; + @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) + public File gatkKeyFile = null; + @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false) public List readFilters = new ArrayList(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index e8627ef4c..f1f74069f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -154,9 +154,7 @@ public class GATKRunReport { /** Standard option. Writes to local repository if it can be found, or S3 otherwise */ STANDARD, /** Force output to STDOUT. For debugging only */ - STDOUT, - /** Force output to S3. For debugging only */ - AWS_S3 // todo -- remove me -- really just for testing purposes + STDOUT } private static final DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss"); @@ -239,11 +237,8 @@ public class GATKRunReport { case STDOUT: postReportToStream(System.out); break; - case AWS_S3: - postReportToAWSS3(); - break; default: - exceptDuringRunReport("BUG: unexcepted PhoneHomeOption "); + exceptDuringRunReport("BUG: unexpected PhoneHomeOption "); break; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 3f555f780..00968943d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -43,15 +43,15 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar final ArrayList altQuals = new ArrayList(); if ( vc.isSNP() ) { + final List altAlleles = new ArrayList(); + for ( final Allele a : vc.getAlternateAlleles() ) + altAlleles.add(a.getBases()[0]); + for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); if ( context == null ) continue; - final List altAlleles = new ArrayList(); - for ( final Allele a : vc.getAlternateAlleles() ) - altAlleles.add(a.getBases()[0]); - fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals); } } else if ( vc.isIndel() || vc.isMixed() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index 89a30e4f5..a1ab73341 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -26,10 +26,12 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Arrays; +import java.util.BitSet; /** * Created by IntelliJ IDEA. @@ -43,10 +45,6 @@ public class ContextCovariate implements StandardCovariate { private int insertionsContextSize; private int deletionsContextSize; - private String mismatchesNoContext = ""; - private String insertionsNoContext = ""; - private String deletionsNoContext = ""; - // Initialize any member variables using the command-line arguments passed to the walkers @Override public void initialize(final RecalibrationArgumentCollection RAC) { @@ -57,29 +55,26 @@ public class ContextCovariate implements StandardCovariate { if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0) throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize)); - // initialize no context strings given the size of the context for each covariate type - mismatchesNoContext = makeAllNStringWithLength(mismatchesContextSize); - insertionsNoContext = makeAllNStringWithLength(insertionsContextSize); - deletionsNoContext = makeAllNStringWithLength( deletionsContextSize); } @Override public CovariateValues getValues(final GATKSAMRecord read) { int l = read.getReadLength(); - String[] mismatches = new String [l]; - String[] insertions = new String [l]; - String[] deletions = new String [l]; + BitSet[] mismatches = new BitSet[l]; + BitSet[] insertions = new BitSet[l]; + BitSet[] deletions = new BitSet[l]; final boolean negativeStrand = read.getReadNegativeStrandFlag(); byte[] bases = read.getReadBases(); - if (negativeStrand) { - bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place - } + if (negativeStrand) + bases = BaseUtils.simpleReverseComplement(bases); + for (int i = 0; i < read.getReadLength(); i++) { - mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext); - insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext); - deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext); + mismatches[i] = contextWith(bases, i, mismatchesContextSize); + insertions[i] = contextWith(bases, i, insertionsContextSize); + deletions[i] = contextWith(bases, i, deletionsContextSize); } + if (negativeStrand) { reverse(mismatches); reverse(insertions); @@ -90,7 +85,7 @@ public class ContextCovariate implements StandardCovariate { // Used to get the covariate's value from input csv file during on-the-fly recalibration @Override - public final Comparable getValue(final String str) { + public final Object getValue(final String str) { return str; } @@ -100,29 +95,28 @@ public class ContextCovariate implements StandardCovariate { * @param bases the bases in the read to build the context from * @param offset the position in the read to calculate the context for * @param contextSize context size to use building the context - * @param noContextString string to return if the position is not far enough in the read to have a full context before. * @return */ - private String contextWith(byte [] bases, int offset, int contextSize, String noContextString) { - return (offset < contextSize) ? noContextString : new String(Arrays.copyOfRange(bases, offset - contextSize, offset)); + private BitSet contextWith(byte [] bases, int offset, int contextSize) { + if (offset < contextSize) + return null; + + String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset)); + if (context.contains("N")) + return null; + + return MathUtils.bitSetFrom(context); } - - private String makeAllNStringWithLength(int length) { - String s = ""; - for (int i=0; i readGroupLookupTable = new HashMap(); + private final HashMap readGroupReverseLookupTable = new HashMap(); + private short nextId = 0; // Initialize any member variables using the command-line arguments passed to the walkers @Override @@ -48,16 +53,29 @@ public class ReadGroupCovariate implements RequiredCovariate { public CovariateValues getValues(final GATKSAMRecord read) { final int l = read.getReadLength(); final String readGroupId = read.getReadGroup().getReadGroupId(); - String [] readGroups = new String[l]; - Arrays.fill(readGroups, readGroupId); + short shortId; + if (readGroupLookupTable.containsKey(readGroupId)) + shortId = readGroupLookupTable.get(readGroupId); + else { + shortId = nextId; + readGroupLookupTable.put(readGroupId, nextId); + readGroupReverseLookupTable.put(nextId, readGroupId); + nextId++; + } + Short [] readGroups = new Short[l]; + Arrays.fill(readGroups, shortId); return new CovariateValues(readGroups, readGroups, readGroups); } // Used to get the covariate's value from input csv file during on-the-fly recalibration @Override - public final Comparable getValue(final String str) { + public final Object getValue(final String str) { return str; } + + public final String decodeReadGroup(final short id) { + return readGroupReverseLookupTable.get(id); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 38e7051e4..cc6f67cc9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -25,8 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -39,6 +45,63 @@ import org.broadinstitute.sting.commandline.Hidden; public class RecalibrationArgumentCollection { + /** + * This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference, + * so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.) + * for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites. + * Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument. + */ + @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false) + protected List> knownSites = Collections.emptyList(); + + /** + * After the header, data records occur one per line until the end of the file. The first several items on a line are the + * values of the individual covariates and will change depending on which covariates were specified at runtime. The last + * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, + * and the raw empirical quality score calculated by phred-scaling the mismatch rate. + */ + @Gather(CountCovariatesGatherer.class) + @Output + protected PrintStream RECAL_FILE; + + /** + * List all implemented covariates. + */ + @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false) + protected boolean LIST_ONLY = false; + + /** + * Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list. + */ + @Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false) + protected String[] COVARIATES = null; + + /* + * Use the standard set of covariates in addition to the ones listed using the -cov argument + */ + @Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false) + protected boolean USE_STANDARD_COVARIATES = true; + + ///////////////////////////// + // Debugging-only Arguments + ///////////////////////////// + /** + * This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option. + */ + @Hidden + @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.") + protected boolean RUN_WITHOUT_DBSNP = false; + + ///////////////////////////// + // protected Member Variables + ///////////////////////////// + protected final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables) + protected final ArrayList requestedCovariates = new ArrayList();// A list to hold the covariate objects that were requested + + protected final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. + protected final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. + + /** * CountCovariates and TableRecalibration accept a --solid_recal_mode flag which governs how the recalibrator handles the * reads which have had the reference inserted because of color space inconsistencies. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java new file mode 100644 index 000000000..d7a48d321 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diagnostics; + +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.Median; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.io.PrintStream; +import java.text.DateFormat; +import java.util.HashMap; +import java.util.Map; + +/** + * Emits a GATKReport containing read group, sample, library, platform, center, sequencing data, + * paired end status, simple read type name (e.g. 2x76) median insert size and median read length + * for each read group in every provided BAM file + * + * Note that this walker stops when all read groups have been observed at least a few thousand times so that + * the median statistics are well determined. It is safe to run it WG and it'll finish in an appropriate + * timeframe. + * + *

Input

+ *

+ * Any number of BAM files + *

+ * + *

Output

+ *

+ * GATKReport containing read group, sample, library, platform, center, median insert size and median read length. + * + * For example, running this tool on the NA12878 data sets: + * + *

+ *      ##:GATKReport.v0.2 ReadGroupProperties : Table of read group properties
+ *      readgroup  sample   library       platform  center  date     has.any.reads  is.paired.end  n.reads.analyzed  simple.read.type  median.read.length  median.insert.size
+ *      20FUK.1    NA12878  Solexa-18483  illumina  BI      2/2/10   true           true                        498  2x101                            101                 386
+ *      20FUK.2    NA12878  Solexa-18484  illumina  BI      2/2/10   true           true                        476  2x101                            101                 417
+ *      20FUK.3    NA12878  Solexa-18483  illumina  BI      2/2/10   true           true                        407  2x101                            101                 387
+ *      20FUK.4    NA12878  Solexa-18484  illumina  BI      2/2/10   true           true                        389  2x101                            101                 415
+ *      20FUK.5    NA12878  Solexa-18483  illumina  BI      2/2/10   true           true                        433  2x101                            101                 386
+ *      20FUK.6    NA12878  Solexa-18484  illumina  BI      2/2/10   true           true                        480  2x101                            101                 418
+ *      20FUK.7    NA12878  Solexa-18483  illumina  BI      2/2/10   true           true                        450  2x101                            101                 386
+ *      20FUK.8    NA12878  Solexa-18484  illumina  BI      2/2/10   true           true                        438  2x101                            101                 418
+ *      20GAV.1    NA12878  Solexa-18483  illumina  BI      1/26/10  true           true                        490  2x101                            101                 391
+ *      20GAV.2    NA12878  Solexa-18484  illumina  BI      1/26/10  true           true                        485  2x101                            101                 417
+ *      20GAV.3    NA12878  Solexa-18483  illumina  BI      1/26/10  true           true                        460  2x101                            101                 392
+ *      20GAV.4    NA12878  Solexa-18484  illumina  BI      1/26/10  true           true                        434  2x101                            101                 415
+ *      20GAV.5    NA12878  Solexa-18483  illumina  BI      1/26/10  true           true                        479  2x101                            101                 389
+ *      20GAV.6    NA12878  Solexa-18484  illumina  BI      1/26/10  true           true                        461  2x101                            101                 416
+ *      20GAV.7    NA12878  Solexa-18483  illumina  BI      1/26/10  true           true                        509  2x101                            101                 386
+ *      20GAV.8    NA12878  Solexa-18484  illumina  BI      1/26/10  true           true                        476  2x101                            101                 410                           101                 414
+ *      
+ *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T ReadGroupProperties
+ *      -I example1.bam -I example2.bam etc
+ *      -R reference.fasta
+ *      -o example.gatkreport.txt
+ *  
+ * + * @author Mark DePristo + */ + + + +public class ReadGroupProperties extends ReadWalker { + @Output + public PrintStream out; + + @Argument(shortName="maxElementsForMedian", doc="Calculate median from the first maxElementsForMedian values observed", required=false) + public int MAX_VALUES_FOR_MEDIAN = 10000; + + private final static String TABLE_NAME = "ReadGroupProperties"; + private final Map readGroupInfo = new HashMap(); + + private class PerReadGroupInfo { + public final Median readLength = new Median(MAX_VALUES_FOR_MEDIAN); + public final Median insertSize = new Median(MAX_VALUES_FOR_MEDIAN); + public int nReadsSeen = 0, nReadsPaired = 0; + + public boolean needsMoreData() { + return ! readLength.isFull() || ! insertSize.isFull(); + } + } + + @Override + public void initialize() { + for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) { + readGroupInfo.put(rg.getId(), new PerReadGroupInfo()); + } + } + + @Override + public boolean filter(ReferenceContext ref, GATKSAMRecord read) { + return ! (read.getReadFailsVendorQualityCheckFlag() || read.getReadUnmappedFlag()); + } + + @Override + public boolean isDone() { + for ( PerReadGroupInfo info : readGroupInfo.values() ) { + if ( info.needsMoreData() ) + return false; + } + + return true; + } + + @Override + public Integer map(ReferenceContext referenceContext, GATKSAMRecord read, ReadMetaDataTracker readMetaDataTracker) { + final String rgID = read.getReadGroup().getId(); + final PerReadGroupInfo info = readGroupInfo.get(rgID); + + if ( info.needsMoreData() ) { + info.readLength.add(read.getReadLength()); + info.nReadsSeen++; + if ( read.getReadPairedFlag() ) { + info.nReadsPaired++; + if ( read.getInferredInsertSize() != 0) { + info.insertSize.add(Math.abs(read.getInferredInsertSize())); + } + } + } + + return null; + } + + @Override + public Integer reduceInit() { + return null; + } + + @Override + public Integer reduce(Integer integer, Integer integer1) { + return null; + } + + @Override + public void onTraversalDone(Integer sum) { + final GATKReport report = new GATKReport(); + report.addTable(TABLE_NAME, "Table of read group properties"); + GATKReportTable table = report.getTable(TABLE_NAME); + DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT); + + table.addPrimaryKey("readgroup"); + //* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and + //* median read length for each read group in every BAM file. + table.addColumn("sample", "NA"); + table.addColumn("library", "NA"); + table.addColumn("platform", "NA"); + table.addColumn("center", "NA"); + table.addColumn("date", "NA"); + table.addColumn("has.any.reads", "false"); + table.addColumn("is.paired.end", "false"); + table.addColumn("n.reads.analyzed", "NA"); + table.addColumn("simple.read.type", "NA"); + table.addColumn("median.read.length", Integer.valueOf(0)); + table.addColumn("median.insert.size", Integer.valueOf(0)); + + for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) { + final String rgID = rg.getId(); + PerReadGroupInfo info = readGroupInfo.get(rgID); + + // we are paired if > 25% of reads are paired + final boolean isPaired = info.nReadsPaired / (1.0 * (info.nReadsSeen+1)) > 0.25; + final boolean hasAnyReads = info.nReadsSeen > 0; + final int readLength = info.readLength.getMedian(0); + + setTableValue(table, rgID, "sample", rg.getSample()); + setTableValue(table, rgID, "library", rg.getLibrary()); + setTableValue(table, rgID, "platform", rg.getPlatform()); + setTableValue(table, rgID, "center", rg.getSequencingCenter()); + try { + setTableValue(table, rgID, "date", rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA"); + } catch ( NullPointerException e ) { + // TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present + setTableValue(table, rgID, "date", "NA"); + } + setTableValue(table, rgID, "has.any.reads", hasAnyReads); + setTableValue(table, rgID, "is.paired.end", isPaired); + setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen); + setTableValue(table, rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA"); + setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA" ); + setTableValue(table, rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA" ); + } + + report.print(out); + } + + private final void setTableValue(GATKReportTable table, final String rgID, final String key, final Object value) { + table.set(rgID, key, value == null ? "NA" : value); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 200a250f2..26023bd2f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -454,8 +454,7 @@ public class HaplotypeIndelErrorModel { // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2) // First term is approximated by Jacobian log with table lookup. // Second term is a constant added to both likelihoods so will be ignored - haplotypeLikehoodMatrix[i][j] += MathUtils.softMax(readLikelihood[0], - readLikelihood[1]); + haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 6410d619d..64993b43a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -166,18 +166,17 @@ public class PairHMMIndelErrorModel { final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual]; - matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead, - YMetricArray[im1][jm1] + pBaseRead); + matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]}); final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1]; final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1]; - XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1); + XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1); // update Y array final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1]; final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1]; - YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2); + YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2); } } @@ -316,9 +315,7 @@ public class PairHMMIndelErrorModel { final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1; - final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ], - XMetricArray[bestI][bestJ], - YMetricArray[bestI][bestJ]); + final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] }); /* if (DEBUG) { @@ -651,7 +648,7 @@ public class PairHMMIndelErrorModel { private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) { final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes]; - // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix + // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplified to just a single loop without the intermediate NxN matrix for (int i=0; i < numHaplotypes; i++) { for (int j=i; j < numHaplotypes; j++){ // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] @@ -665,7 +662,7 @@ public class PairHMMIndelErrorModel { final double li = readLikelihoods[readIdx][i]; final double lj = readLikelihoods[readIdx][j]; final int readCount = readCounts[readIdx]; - haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF); + haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + LOG_ONE_HALF); } } } @@ -678,7 +675,7 @@ public class PairHMMIndelErrorModel { } } - // renormalize so that max element is zero. + // renormalize so that max element is zero. return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index b27bef265..e812fb53a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -110,6 +110,13 @@ public class ValidationAmplicons extends RodWalker { @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) boolean lowerCaseSNPs = false; + /** + * If onlyOutputValidAmplicons is true, the output fasta file will contain only valid sequences. + * Useful for producing delivery-ready files. + */ + @Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false) + boolean onlyOutputValidAmplicons = false; + /** * BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased. * This changes the size of the k-mer used for alignment. @@ -486,14 +493,16 @@ public class ValidationAmplicons extends RodWalker { valid = "Valid"; } - String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); - if (!sequenomOutput) - out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); - else { - seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record - probeName = probeName.replace("amplicon_","a"); - out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity); + if (!onlyOutputValidAmplicons || !sequenceInvalid) { + String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); + if (!sequenomOutput) + out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); + else { + seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record + probeName = probeName.replace("amplicon_","a"); + out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity); + } } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index 82776ca2e..6f0ae7c25 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -140,15 +140,16 @@ public class GaussianMixtureModel { } for( final VariantDatum datum : data ) { - final ArrayList pVarInGaussianLog10 = new ArrayList( gaussians.size() ); + final double[] pVarInGaussianLog10 = new double[gaussians.size()]; + int gaussianIndex = 0; for( final MultivariateGaussian gaussian : gaussians ) { final double pVarLog10 = gaussian.evaluateDatumLog10( datum ); - pVarInGaussianLog10.add( pVarLog10 ); + pVarInGaussianLog10[gaussianIndex++] = pVarLog10; } - final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10 ); - int iii = 0; + final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false ); + gaussianIndex = 0; for( final MultivariateGaussian gaussian : gaussians ) { - gaussian.assignPVarInGaussian( pVarInGaussianNormalized[iii++] ); //BUGBUG: to clean up + gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] ); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 7cc5b1625..3cdcf4982 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -372,6 +372,8 @@ public class VariantRecalibrator extends RodWalker= 0", "hapStartInRefCoords >= 0"}) + public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) { if( refAllele.length() != altAllele.length() ) { refInsertLocation++; } - int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation); + int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(hapStartInRefCoords, haplotypeCigar, refInsertLocation); if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype - return getBases().clone(); + return bases.clone(); } - haplotypeInsertLocation += numBasesAddedToStartOfHaplotype; - final byte[] newHaplotype = getBases().clone(); + byte[] newHaplotype; try { if( refAllele.length() == altAllele.length() ) { // SNP or MNP + newHaplotype = bases.clone(); for( int iii = 0; iii < altAllele.length(); iii++ ) { newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; } - } else if( refAllele.length() < altAllele.length() ) { // insertion + } else if( refAllele.length() < altAllele.length() ) { // insertion final int altAlleleLength = altAllele.length(); + newHaplotype = new byte[bases.length + altAlleleLength]; + for( int iii = 0; iii < bases.length; iii++ ) { + newHaplotype[iii] = bases[iii]; + } for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) { newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; } @@ -138,24 +143,17 @@ public class Haplotype { newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; } } else { // deletion - int refHaplotypeOffset = 0; - for( final CigarElement ce : haplotypeCigar.getCigarElements()) { - if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); } - else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); } - } - for( int iii = 0; iii < altAllele.length(); iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; - } final int shift = refAllele.length() - altAllele.length(); - for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) { - newHaplotype[iii] = newHaplotype[iii+shift]; + newHaplotype = new byte[bases.length - shift]; + for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) { + newHaplotype[iii] = bases[iii]; } - for( int iii = 0; iii < shift; iii++ ) { - newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii]; + for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) { + newHaplotype[iii] = bases[iii+shift]; } } } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding - return getBases().clone(); + return bases.clone(); } return newHaplotype; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index c9ab3b58e..a96cbffc5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -29,6 +29,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.math.BigDecimal; @@ -40,14 +41,6 @@ import java.util.*; * @author Kiran Garimella */ public class MathUtils { - /** Public constants - used for the Lanczos approximation to the factorial function - * (for the calculation of the binomial/multinomial probability in logspace) - * @param LANC_SEQ[] - an array holding the constants which correspond to the product - * of Chebyshev Polynomial coefficients, and points on the Gamma function (for interpolation) - * [see A Precision Approximation of the Gamma Function J. SIAM Numer. Anal. Ser. B, Vol. 1 1964. pp. 86-96] - * @param LANC_G - a value for the Lanczos approximation to the gamma function that works to - * high precision - */ /** * Private constructor. No instantiating this class! @@ -55,6 +48,28 @@ public class MathUtils { private MathUtils() { } + public static final double[] log10Cache; + private static final double[] jacobianLogTable; + private static final double JACOBIAN_LOG_TABLE_STEP = 0.001; + private static final double MAX_JACOBIAN_TOLERANCE = 10.0; + private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1; + private static final int MAXN = 11000; + private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients + + static { + log10Cache = new double[LOG10_CACHE_SIZE]; + jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; + + log10Cache[0] = Double.NEGATIVE_INFINITY; + for (int k = 1; k < LOG10_CACHE_SIZE; k++) + log10Cache[k] = Math.log10(k); + + for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { + jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); + + } + } + // A fast implementation of the Math.round() method. This method does not perform // under/overflow checking, so this shouldn't be used in the general case (but is fine // if one is already make those checks before calling in to the rounding). @@ -535,7 +550,7 @@ public class MathUtils { // all negative) the largest value; also, we need to convert to normal-space. double maxValue = Utils.findMaxEntry(array); - // we may decide to just normalize in log space with converting to linear space + // we may decide to just normalize in log space without converting to linear space if (keepInLogSpace) { for (int i = 0; i < array.length; i++) array[i] -= maxValue; @@ -562,29 +577,6 @@ public class MathUtils { return normalized; } - public static double[] normalizeFromLog10(List array, boolean takeLog10OfOutput) { - double[] normalized = new double[array.size()]; - - // for precision purposes, we need to add (or really subtract, since they're - // all negative) the largest value; also, we need to convert to normal-space. - double maxValue = MathUtils.arrayMaxDouble(array); - for (int i = 0; i < array.size(); i++) - normalized[i] = Math.pow(10, array.get(i) - maxValue); - - // normalize - double sum = 0.0; - for (int i = 0; i < array.size(); i++) - sum += normalized[i]; - for (int i = 0; i < array.size(); i++) { - double x = normalized[i] / sum; - if (takeLog10OfOutput) - x = Math.log10(x); - normalized[i] = x; - } - - return normalized; - } - /** * normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE). * @@ -595,10 +587,6 @@ public class MathUtils { return normalizeFromLog10(array, false); } - public static double[] normalizeFromLog10(List array) { - return normalizeFromLog10(array, false); - } - public static int maxElementIndex(final double[] array) { return maxElementIndex(array, array.length); } @@ -1206,78 +1194,11 @@ public class MathUtils { return ((double) num) / (Math.max(denom, 1)); } - public static final double[] log10Cache; - public static final double[] jacobianLogTable; - public static final int JACOBIAN_LOG_TABLE_SIZE = 101; - public static final double JACOBIAN_LOG_TABLE_STEP = 0.1; - public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP; - public static final double MAX_JACOBIAN_TOLERANCE = 10.0; - private static final int MAXN = 11000; - private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients - - static { - log10Cache = new double[LOG10_CACHE_SIZE]; - jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; - - log10Cache[0] = Double.NEGATIVE_INFINITY; - for (int k = 1; k < LOG10_CACHE_SIZE; k++) - log10Cache[k] = Math.log10(k); - - for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { - jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); - - } - } - - static public double softMax(final double[] vec) { - double acc = vec[0]; - for (int k = 1; k < vec.length; k++) - acc = softMax(acc, vec[k]); - - return acc; - - } - static public double max(double x0, double x1, double x2) { double a = Math.max(x0, x1); return Math.max(a, x2); } - static public double softMax(final double x0, final double x1, final double x2) { - // compute naively log10(10^x[0] + 10^x[1]+...) - // return Math.log10(MathUtils.sumLog10(vec)); - - // better approximation: do Jacobian logarithm function on data pairs - double a = softMax(x0, x1); - return softMax(a, x2); - } - - static public double softMax(final double x, final double y) { - // we need to compute log10(10^x + 10^y) - // By Jacobian logarithm identity, this is equal to - // max(x,y) + log10(1+10^-abs(x-y)) - // we compute the second term as a table lookup - // with integer quantization - - // slow exact version: - // return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y)); - - double diff = x - y; - - if (diff > MAX_JACOBIAN_TOLERANCE) - return x; - else if (diff < -MAX_JACOBIAN_TOLERANCE) - return y; - else if (diff >= 0) { - int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5); - return x + jacobianLogTable[ind]; - } - else { - int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5); - return y + jacobianLogTable[ind]; - } - } - public static double phredScaleToProbability(byte q) { return Math.pow(10, (-q) / 10.0); } @@ -1619,30 +1540,118 @@ public class MathUtils { * @param bitSet the bitset * @return an integer with the bitset representation */ - public static int intFrom(final BitSet bitSet) { - int integer = 0; + public static long intFrom(final BitSet bitSet) { + long number = 0; for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1)) - integer |= 1 << bitIndex; + number |= 1L << bitIndex; - return integer; + return number; } /** * Creates a BitSet representation of a given integer * - * @param integer the number to turn into a bitset + * @param number the number to turn into a bitset * @return a bitset representation of the integer */ - public static BitSet bitSetFrom(int integer) { - BitSet bitSet = new BitSet((int) Math.ceil(Math.sqrt(integer))); + public static BitSet bitSetFrom(long number) { + BitSet bitSet = new BitSet(); int bitIndex = 0; - while (integer > 0) { - if (integer%2 > 0) + while (number > 0) { + if (number%2 > 0) bitSet.set(bitIndex); bitIndex++; - integer /= 2; + number /= 2; } return bitSet; } + /** + * Converts a BitSet into the dna string representation. + * + * Warning: This conversion is limited to long precision, therefore the dna sequence cannot + * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create + * a bitSetFrom(BigNumber) method. + * + * We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the + * base_10 representation of the sequence. This is important for us to know how to bring the number + * to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented + * as 0's and leading 0's are omitted). + * + * quasi-canonical because A is represented by a 0, therefore, + * instead of : 0, 1, 2, 3, 10, 11, 12, ... + * we have : 0, 1, 2, 3, 00, 01, 02, ... + * + * but we can correctly decode it because we know the final length. + * + * @param bitSet the bitset representation of the dna sequence + * @return the dna sequence represented by the bitset + */ + public static String dnaFrom(final BitSet bitSet) { + long number = intFrom(bitSet); // the base_10 representation of the bit set + long preContext = 0; // the number of combinations skipped to get to the quasi-canonical representation (we keep it to subtract later) + long nextContext = 4; // the next context (we advance it so we know which one was preceding it). + int i = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet. + while (nextContext <= number) { // find the length of the dna string (i) + preContext = nextContext; // keep track of the number of combinations in the preceding context + nextContext += Math.pow(4, ++i);// calculate the next context + } + number -= preContext; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation + + String dna = ""; + while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical) + byte base = (byte) (number % 4); + switch (base) { + case 0 : dna = "A" + dna; break; + case 1 : dna = "C" + dna; break; + case 2 : dna = "G" + dna; break; + case 3 : dna = "T" + dna; break; + } + number /= 4; + } + for (int j = dna.length(); j < i; j++) + dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above) + + return dna; + } + + /** + * Creates a BitSet representation of a given dna string. + * + * Warning: This conversion is limited to long precision, therefore the dna sequence cannot + * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create + * a bitSetFrom(BigNumber) method. + * + * The bit representation of a dna string is the simple: + * 0 A 4 AA 8 CA + * 1 C 5 AC ... + * 2 G 6 AG 1343 TTGGT + * 3 T 7 AT 1364 TTTTT + * + * To convert from dna to number, we convert the dna string to base10 and add all combinations that + * preceded the string (with smaller lengths). + * + * @param dna the dna sequence + * @return the bitset representing the dna sequence + */ + public static BitSet bitSetFrom(String dna) { + if (dna.length() > 31) + throw new ReviewedStingException(String.format("DNA Length cannot be bigger than 31. dna: %s (%d)", dna, dna.length())); + + long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set + long preContext = 0; // the sum of all combinations that preceded the length of the dna string + for (int i=0; i0) + preContext += Math.pow(4, i); // each length will have 4^i combinations (e.g 1 = 4, 2 = 16, 3 = 64, ...) + } + + return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length. + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/Median.java b/public/java/src/org/broadinstitute/sting/utils/Median.java new file mode 100644 index 000000000..7ebe8d2d7 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/Median.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils; + +import java.util.*; + +/** + * Utility class for calculating median from a data set, potentially limiting the size of data to a + * fixed amount + * + * @author Your Name + * @since Date created + */ +public class Median { + final List values; + final int maxValuesToKeep; + boolean sorted = false; + + public Median() { + this(Integer.MAX_VALUE); + } + + public Median(final int maxValuesToKeep) { + this.maxValuesToKeep = maxValuesToKeep; + this.values = new ArrayList(); + } + + public boolean isFull() { + return values.size() >= maxValuesToKeep; + } + + public int size() { + return values.size(); + } + + public boolean isEmpty() { + return values.isEmpty(); + } + + public T getMedian() { + if ( isEmpty() ) + throw new IllegalStateException("Cannot get median value from empty array"); + return getMedian(null); // note that value null will never be used + } + + /** + * Returns the floor(n + 1 / 2) item from the list of values if the list + * has values, or defaultValue is the list is empty. + */ + public T getMedian(final T defaultValue) { + if ( isEmpty() ) + return defaultValue; + + if ( ! sorted ) { + sorted = true; + Collections.sort(values); + } + + final int offset = (int)Math.floor((values.size() + 1) * 0.5) - 1; + return values.get(offset); + } + + public boolean add(final T value) { + if ( ! isFull() ) { + sorted = false; + return values.add(value); + } + else + return false; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index fb133d902..62a67a1f2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -4,6 +4,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -314,6 +315,15 @@ public class ClippingOp { if (start == 0) hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar)); + if (read.hasBaseIndelQualities()) { + byte[] newBaseInsertionQuals = new byte[newLength]; + byte[] newBaseDeletionQuals = new byte[newLength]; + System.arraycopy(read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength); + System.arraycopy(read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength); + hardClippedRead.setBaseQualities(newBaseInsertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION); + hardClippedRead.setBaseQualities(newBaseDeletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION); + } + return hardClippedRead; } diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java index d280ac804..8652d3c28 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java @@ -34,7 +34,7 @@ import java.util.Map; * Date: Dec 29, 2009 */ -public class NestedHashMap{ +public class NestedHashMap { public final Map data = new HashMap(); diff --git a/public/java/src/org/broadinstitute/sting/utils/crypt/CryptUtils.java b/public/java/src/org/broadinstitute/sting/utils/crypt/CryptUtils.java new file mode 100644 index 000000000..e84b1432e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/crypt/CryptUtils.java @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.io.IOUtils; + +import javax.crypto.Cipher; +import java.io.File; +import java.io.InputStream; +import java.security.*; +import java.security.spec.InvalidKeySpecException; +import java.security.spec.KeySpec; +import java.security.spec.PKCS8EncodedKeySpec; +import java.security.spec.X509EncodedKeySpec; +import java.util.Arrays; + +/** + * A set of cryptographic utility methods and constants. + * + * Contains methods to: + * + * -Create a public/private key pair + * -Read and write public/private keys to/from files/streams + * -Load the GATK master private/public keys + * -Encrypt/decrypt data + * + * Also contains constants that control the cryptographic defaults + * throughout the GATK. + * + * @author David Roazen + */ +public class CryptUtils { + + // --------------------------------------------------------------------------------- + // Constants (these control the default cryptographic settings throughout the GATK): + // --------------------------------------------------------------------------------- + + /** + * Default key length in bits of newly-created keys. 2048 bits provides a good balance between + * security and speed. + */ + public static final int DEFAULT_KEY_LENGTH = 2048; + + /** + * Default encryption algorithm to use, when none is specified. + */ + public static final String DEFAULT_ENCRYPTION_ALGORITHM = "RSA"; + + /** + * Default random-number generation algorithm to use, when none is specified. + */ + public static final String DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM = "SHA1PRNG"; + + /** + * Name of the public key file distributed with the GATK. This file is packaged + * into the GATK jar, and we use the system ClassLoader to find it. + */ + public static final String GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME = "GATK_public.key"; + + /** + * Location of the master copy of the GATK private key. + */ + public static final String GATK_MASTER_PRIVATE_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_private.key"; + + /** + * Location of the master copy of the GATK public key. This file should always be the same as + * the public key file distributed with the GATK (and there are automated tests to ensure that it is). + */ + public static final String GATK_MASTER_PUBLIC_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_public.key"; + + /** + * Directory where generated GATK user keys are stored. See the GATKKey class for more information. + */ + public static final String GATK_USER_KEY_DIRECTORY = "/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/"; + + + // ----------------------- + // Utility Methods: + // ----------------------- + + /** + * Generate a new public/private key pair using the default encryption settings defined above. + * + * @return A new public/private key pair created using the default settings + */ + public static KeyPair generateKeyPair() { + return generateKeyPair(DEFAULT_KEY_LENGTH, DEFAULT_ENCRYPTION_ALGORITHM, DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM); + } + + /** + * Generate a new public/private key pair using custom encryption settings. + * + * @param keyLength Length of the key in bits + * @param encryptionAlgorithm Encryption algorithm to use + * @param randNumberAlgorithm Random-number generation algorithm to use + * @return A new public/private key pair, created according to the specified parameters + */ + public static KeyPair generateKeyPair( int keyLength, String encryptionAlgorithm, String randNumberAlgorithm ) { + try { + KeyPairGenerator keyGen = KeyPairGenerator.getInstance(encryptionAlgorithm); + SecureRandom randomnessSource = createRandomnessSource(randNumberAlgorithm); + + keyGen.initialize(keyLength, randomnessSource); + return keyGen.generateKeyPair(); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e); + } + catch ( Exception e ) { + throw new ReviewedStingException("Error while generating key pair", e); + } + } + + /** + * Create a source of randomness using the default random-number generation algorithm. + * + * @return A randomness source that uses the default algorithm + */ + public static SecureRandom createRandomnessSource() { + return createRandomnessSource(DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM); + } + + /** + * Create a source of randomness using a custom random-number generation algorithm. + * + * @param randAlgorithm The random-number generation algorithm to use + * @return A randomness sources that uses the specified algorithm + */ + public static SecureRandom createRandomnessSource ( String randAlgorithm ) { + try { + return SecureRandom.getInstance(randAlgorithm); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested random-number generation algorithm %s", randAlgorithm), e); + } + } + + /** + * Writes a public/private key pair to disk + * + * @param keyPair The key pair we're writing to disk + * @param privateKeyFile Location to write the private key + * @param publicKeyFile Location to write the public key + */ + public static void writeKeyPair ( KeyPair keyPair, File privateKeyFile, File publicKeyFile ) { + writeKey(keyPair.getPrivate(), privateKeyFile); + writeKey(keyPair.getPublic(), publicKeyFile); + } + + /** + * Writes an arbitrary key to disk + * + * @param key The key to write + * @param destination Location to write the key to + */ + public static void writeKey ( Key key, File destination ) { + IOUtils.writeByteArrayToFile(key.getEncoded(), destination); + } + + /** + * Reads in a public key created using the default encryption algorithm from a file. + * + * @param source File containing the public key + * @return The public key read + */ + public static PublicKey readPublicKey ( File source ) { + return decodePublicKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM); + } + + /** + * Reads in a public key created using the default encryption algorithm from a stream. + * + * @param source Stream attached to the public key + * @return The public key read + */ + public static PublicKey readPublicKey ( InputStream source ) { + return decodePublicKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM); + } + + /** + * Decodes the raw bytes of a public key into a usable object. + * + * @param rawKey The encoded bytes of a public key as read from, eg., a file. The + * key must be in the standard X.509 format for a public key. + * @param encryptionAlgorithm The encryption algorithm used to create the public key + * @return The public key as a usable object + */ + public static PublicKey decodePublicKey ( byte[] rawKey, String encryptionAlgorithm ) { + try { + KeySpec keySpec = new X509EncodedKeySpec(rawKey); + KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm); + return keyFactory.generatePublic(keySpec); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e); + } + catch ( InvalidKeySpecException e ) { + throw new ReviewedStingException("Unable to use X.509 key specification to decode the given key", e); + } + } + + /** + * Reads in a private key created using the default encryption algorithm from a file. + * + * @param source File containing the private key + * @return The private key read + */ + public static PrivateKey readPrivateKey ( File source ) { + return decodePrivateKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM); + } + + /** + * Reads in a private key created using the default encryption algorithm from a stream. + * + * @param source Stream attached to the private key + * @return The private key read + */ + public static PrivateKey readPrivateKey ( InputStream source ) { + return decodePrivateKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM); + } + + /** + * Decodes the raw bytes of a private key into a usable object. + * + * @param rawKey The encoded bytes of a private key as read from, eg., a file. The + * key must be in the standard PKCS #8 format for a private key. + * @param encryptionAlgorithm The encryption algorithm used to create the private key + * @return The private key as a usable object + */ + public static PrivateKey decodePrivateKey ( byte[] rawKey, String encryptionAlgorithm ) { + try { + KeySpec keySpec = new PKCS8EncodedKeySpec(rawKey); + KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm); + return keyFactory.generatePrivate(keySpec); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e); + } + catch ( InvalidKeySpecException e ) { + throw new ReviewedStingException("Unable to use the PKCS #8 key specification to decode the given key", e); + } + } + + /** + * Loads the copy of the GATK public key that is distributed with the GATK. Uses the system + * ClassLoader to locate the public key file, which should be stored at the root of the GATK + * jar file. + * + * @return The GATK public key as a usable object + */ + public static PublicKey loadGATKDistributedPublicKey() { + InputStream publicKeyInputStream = ClassLoader.getSystemResourceAsStream(GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME); + + if ( publicKeyInputStream == null ) { + throw new ReviewedStingException(String.format("Could not locate the GATK public key %s in the classpath", + GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME)); + } + + return readPublicKey(publicKeyInputStream); + } + + /** + * Loads the master copy of the GATK private key. You must have the appropriate UNIX permissions + * to do this! + * + * @return The GATK master private key as a usable object + */ + public static PrivateKey loadGATKMasterPrivateKey() { + return readPrivateKey(new File(GATK_MASTER_PRIVATE_KEY_FILE)); + } + + /** + * Loads the master copy of the GATK public key. This should always be the same as the + * public key distributed with the GATK returned by loadGATKDistributedPublicKey(). + * + * @return The GATK master public key as a usable object + */ + public static PublicKey loadGATKMasterPublicKey() { + return readPublicKey(new File(GATK_MASTER_PUBLIC_KEY_FILE)); + } + + /** + * Encrypts the given data using the key provided. + * + * @param data The data to encrypt, as a byte array + * @param encryptKey The key with which to encrypt the data + * @return The encrypted version of the provided data + */ + public static byte[] encryptData ( byte[] data, Key encryptKey ) { + return transformDataUsingCipher(data, encryptKey, Cipher.ENCRYPT_MODE); + } + + /** + * Decrypts the given data using the key provided. + * + * @param encryptedData Data to decrypt, as a byte array + * @param decryptKey The key with which to decrypt the data + * @return The decrypted version of the provided data + */ + public static byte[] decryptData ( byte[] encryptedData, Key decryptKey ) { + return transformDataUsingCipher(encryptedData, decryptKey, Cipher.DECRYPT_MODE); + } + + /** + * Helper method for encryption/decryption that takes data and processes it using + * the given key + * + * @param data Data to encrypt/decrypt + * @param key Key to use to encrypt/decrypt the data + * @param cipherMode Specifies whether we are encrypting or decrypting + * @return The encrypted/decrypted data + */ + private static byte[] transformDataUsingCipher ( byte[] data, Key key, int cipherMode ) { + try { + Cipher cipher = Cipher.getInstance(key.getAlgorithm()); + cipher.init(cipherMode, key); + return cipher.doFinal(data); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Could not find an implementation of the requested algorithm %s", + key.getAlgorithm()), e); + } + catch ( InvalidKeyException e ) { + throw new ReviewedStingException("Key is invalid", e); + } + catch ( GeneralSecurityException e ) { + throw new ReviewedStingException("Error during encryption", e); + } + } + + /** + * Tests whether the public/private keys provided can each decrypt data encrypted by + * the other key -- ie., tests whether these two keys are part of the same public/private + * key pair. + * + * @param privateKey The private key to test + * @param publicKey The public key to test + * @return True if the keys are part of the same key pair and can decrypt each other's + * encrypted data, otherwise false. + */ + public static boolean keysDecryptEachOther ( PrivateKey privateKey, PublicKey publicKey ) { + byte[] plainText = "Test PlainText".getBytes(); + + byte[] dataEncryptedUsingPrivateKey = CryptUtils.encryptData(plainText, privateKey); + byte[] dataEncryptedUsingPublicKey = CryptUtils.encryptData(plainText, publicKey); + + byte[] privateKeyDataDecryptedWithPublicKey = CryptUtils.decryptData(dataEncryptedUsingPrivateKey, publicKey); + byte[] publicKeyDataDecryptedWithPrivateKey = CryptUtils.decryptData(dataEncryptedUsingPublicKey, privateKey); + + // Make sure we actually transformed the data during encryption: + if ( Arrays.equals(plainText, dataEncryptedUsingPrivateKey) || + Arrays.equals(plainText, dataEncryptedUsingPublicKey) || + Arrays.equals(dataEncryptedUsingPrivateKey, dataEncryptedUsingPublicKey) ) { + return false; + } + + // Make sure that we were able to recreate the original plaintext using + // both the public key on the private-key-encrypted data and the private + // key on the public-key-encrypted data: + if ( ! Arrays.equals(plainText, privateKeyDataDecryptedWithPublicKey) || + ! Arrays.equals(plainText, publicKeyDataDecryptedWithPrivateKey) ) { + return false; + } + + return true; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/crypt/GATKKey.java b/public/java/src/org/broadinstitute/sting/utils/crypt/GATKKey.java new file mode 100644 index 000000000..408cb56aa --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/crypt/GATKKey.java @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.IOUtils; + +import java.io.*; +import java.security.*; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +/** + * Class to represent a GATK user key. + * + * A GATK user key contains an email address and a cryptographic signature. + * The signature is the SHA-1 hash of the email address encrypted using + * the GATK master private key. The GATK master public key (distributed + * with the GATK) is used to decrypt the signature and validate the key + * at the start of each GATK run that requires a key. + * + * Keys are cryptographically secure in that valid keys definitely come + * from us and cannot be fabricated, however nothing prevents keys from + * being shared between users. + * + * GATK user keys have the following on-disk format: + * + * GZIP Container: + * Email address + * NUL byte (delimiter) + * Cryptographic Signature (encrypted SHA-1 hash of email address) + * + * The key data is wrapped within a GZIP container to placate over-zealous + * email filters (since keys must often be emailed) and also to provide an + * additional integrity check via the built-in GZIP CRC. + * + * @author David Roazen + */ +public class GATKKey { + + /** + * Private key used to sign the GATK key. Required only when creating a new + * key from scratch, not when loading an existing key from disk. + */ + private PrivateKey privateKey; + + /** + * Public key used to validate the GATK key. + */ + private PublicKey publicKey; + + /** + * The user's email address, stored within the key and signed. + */ + private String emailAddress; + + /** + * The cryptographic signature of the email address. By default, this is + * the SHA-1 hash of the email address encrypted using the RSA algorithm. + */ + private byte[] signature; + + /** + * The combination of hash/encryption algorithms to use to generate the signature. + * By default this is "SHA1withRSA" + */ + private String signingAlgorithm; + + /** + * Default hash/encryption algorithms to use to sign the key. + */ + public static final String DEFAULT_SIGNING_ALGORITHM = "SHA1withRSA"; + + /** + * Byte value used to separate the email address from its signature in the key file. + */ + public static final byte GATK_KEY_SECTIONAL_DELIMITER = 0; + + + // ----------------------- + // Constructors: + // ----------------------- + + /** + * Constructor to create a new GATK key from scratch using an email address + * and public/private key pair. The private key is used for signing, and the + * public key is used to validate the newly-created key. + * + * @param privateKey Private key used to sign the new GATK key + * @param publicKey Public key used to validate the new GATK key + * @param emailAddress The user's email address, which we will store in the key and sign + */ + public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress ) { + this(privateKey, publicKey, emailAddress, DEFAULT_SIGNING_ALGORITHM); + } + + /** + * Constructor to create a new GATK key from scratch using an email address + * and public/private key pair, and additionally specify the signing algorithm + * to use. The private key is used for signing, and the public key is used to + * validate the newly-created key. + * + * @param privateKey Private key used to sign the new GATK key + * @param publicKey Public key used to validate the new GATK key + * @param emailAddress The user's email address, which we will store in the key and sign + * @param signingAlgorithm The combination of hash and encryption algorithms to use to sign the key + */ + public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress, String signingAlgorithm ) { + if ( privateKey == null || publicKey == null || emailAddress == null || emailAddress.length() == 0 || signingAlgorithm == null ) { + throw new ReviewedStingException("Cannot construct GATKKey using null/empty arguments"); + } + + this.privateKey = privateKey; + this.publicKey = publicKey; + this.emailAddress = emailAddress; + this.signingAlgorithm = signingAlgorithm; + + validateEmailAddress(); + generateSignature(); + + if ( ! isValid() ) { + throw new ReviewedStingException("Newly-generated GATK key fails validation -- this should never happen!"); + } + } + + /** + * Constructor to load an existing GATK key from a file. + * + * During loading, the key file is checked for integrity, but not cryptographic + * validity (which must be done through a subsequent call to isValid()). + * + * @param publicKey Public key that will be used to validate the loaded GATK key + * in subsequent calls to isValid() + * @param keyFile File containing the GATK key to load + */ + public GATKKey ( PublicKey publicKey, File keyFile ) { + this(publicKey, keyFile, DEFAULT_SIGNING_ALGORITHM); + } + + /** + * Constructor to load an existing GATK key from a file, and additionally specify + * the signing algorithm used to sign the key being loaded. + * + * During loading, the key file is checked for integrity, but not cryptographic + * validity (which must be done through a subsequent call to isValid()). + * + * @param publicKey Public key that will be used to validate the loaded GATK key + * in subsequent calls to isValid() + * @param keyFile File containing the GATK key to load + * @param signingAlgorithm The combination of hash and encryption algorithms used to sign the key + */ + public GATKKey ( PublicKey publicKey, File keyFile, String signingAlgorithm ) { + if ( publicKey == null || keyFile == null || signingAlgorithm == null ) { + throw new ReviewedStingException("Cannot construct GATKKey using null arguments"); + } + + this.publicKey = publicKey; + this.signingAlgorithm = signingAlgorithm; + + readKey(keyFile); + } + + // ----------------------- + // Public API Methods: + // ----------------------- + + /** + * Writes out this key to a file in the format described at the top of this class, + * encapsulating the key within a GZIP container. + * + * @param destination File to write the key to + */ + public void writeKey ( File destination ) { + try { + byte[] keyBytes = marshalKeyData(); + IOUtils.writeByteArrayToStream(keyBytes, new GZIPOutputStream(new FileOutputStream(destination))); + } + catch ( IOException e ) { + throw new UserException.CouldNotCreateOutputFile(destination, e); + } + } + + /** + * Checks whether the signature of this key is cryptographically valid (ie., can be + * decrypted by the public key to produce a valid SHA-1 hash of the email address + * in the key). + * + * @return True if the key's signature passes validation, otherwise false + */ + public boolean isValid() { + try { + Signature sig = Signature.getInstance(signingAlgorithm); + sig.initVerify(publicKey); + sig.update(emailAddress.getBytes()); + return sig.verify(signature); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e); + } + catch ( InvalidKeyException e ) { + // If the GATK public key is invalid, it's likely our problem, not the user's: + throw new ReviewedStingException(String.format("Public key %s is invalid", publicKey), e); + } + catch ( SignatureException e ) { + throw new UserException.UnreadableKeyException("Signature is invalid or signing algorithm was unable to process the input data", e); + } + } + + // ----------------------- + // Private Helper Methods: + // ----------------------- + + /** + * Helper method that creates a signature for this key using the combination of + * hash/encryption algorithms specified at construction time. + */ + private void generateSignature() { + try { + Signature sig = Signature.getInstance(signingAlgorithm); + sig.initSign(privateKey, CryptUtils.createRandomnessSource()); + sig.update(emailAddress.getBytes()); + signature = sig.sign(); + } + catch ( NoSuchAlgorithmException e ) { + throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e); + } + catch ( InvalidKeyException e ) { + throw new ReviewedStingException(String.format("Private key %s is invalid", privateKey), e); + } + catch ( SignatureException e ) { + throw new ReviewedStingException(String.format("Error creating signature for email address %s", emailAddress), e); + } + } + + /** + * Helper method that reads in a GATK key from a file. Should not be called directly -- + * use the appropriate constructor above. + * + * @param source File to read the key from + */ + private void readKey ( File source ) { + try { + byte[] keyBytes = IOUtils.readStreamIntoByteArray(new GZIPInputStream(new FileInputStream(source))); + + // As a sanity check, compare the number of bytes read to the uncompressed file size + // stored in the GZIP ISIZE field. If they don't match, the key must be corrupt: + if ( keyBytes.length != IOUtils.getGZIPFileUncompressedSize(source) ) { + throw new UserException.UnreadableKeyException("Number of bytes read does not match the uncompressed size specified in the GZIP ISIZE field"); + } + + unmarshalKeyData(keyBytes); + } + catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(source, e); + } + catch ( IOException e ) { + throw new UserException.UnreadableKeyException(source, e); + } + catch ( UserException.CouldNotReadInputFile e ) { + throw new UserException.UnreadableKeyException(source, e); + } + } + + /** + * Helper method that assembles the email address and signature into a format + * suitable for writing to disk. + * + * @return The aggregated key data, ready to be written to disk + */ + private byte[] marshalKeyData() { + byte[] emailAddressBytes = emailAddress.getBytes(); + byte[] assembledKey = new byte[emailAddressBytes.length + 1 + signature.length]; + + System.arraycopy(emailAddressBytes, 0, assembledKey, 0, emailAddressBytes.length); + assembledKey[emailAddressBytes.length] = GATK_KEY_SECTIONAL_DELIMITER; + System.arraycopy(signature, 0, assembledKey, emailAddressBytes.length + 1, signature.length); + + return assembledKey; + } + + /** + * Helper method that parses the raw key data from disk into its component + * email address and signature. Performs some basic validation in the process. + * + * @param keyBytes The raw, uncompressed key data read from disk + */ + private void unmarshalKeyData ( byte[] keyBytes ) { + int delimiterPosition = -1; + + for ( int i = 0; i < keyBytes.length; i++ ) { + if ( keyBytes[i] == GATK_KEY_SECTIONAL_DELIMITER ) { + delimiterPosition = i; + break; + } + } + + if ( delimiterPosition == -1 ) { + throw new UserException.UnreadableKeyException("Malformed GATK key contains no sectional delimiter"); + } + else if ( delimiterPosition == 0 ) { + throw new UserException.UnreadableKeyException("Malformed GATK key contains no email address"); + } + else if ( delimiterPosition == keyBytes.length - 1 ) { + throw new UserException.UnreadableKeyException("Malformed GATK key contains no signature"); + } + + byte[] emailAddressBytes = new byte[delimiterPosition]; + System.arraycopy(keyBytes, 0, emailAddressBytes, 0, delimiterPosition); + emailAddress = new String(emailAddressBytes); + + signature = new byte[keyBytes.length - delimiterPosition - 1]; + System.arraycopy(keyBytes, delimiterPosition + 1, signature, 0, keyBytes.length - delimiterPosition - 1); + } + + /** + * Helper method that ensures that the user's email address does not contain the NUL byte, which we + * reserve as a delimiter within each key file. + */ + private void validateEmailAddress() { + for ( byte b : emailAddress.getBytes() ) { + if ( b == GATK_KEY_SECTIONAL_DELIMITER ) { + throw new UserException(String.format("Email address must not contain a byte with value %d", GATK_KEY_SECTIONAL_DELIMITER)); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 2ece3b077..6cc8008d2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -132,6 +132,10 @@ public class UserException extends ReviewedStingException { public CouldNotReadInputFile(File file, Exception e) { this(file, e.getMessage()); } + + public CouldNotReadInputFile(String message) { + super(message); + } } @@ -151,6 +155,10 @@ public class UserException extends ReviewedStingException { public CouldNotCreateOutputFile(File file, Exception e) { super(String.format("Couldn't write file %s because exception %s", file.getAbsolutePath(), e.getMessage())); } + + public CouldNotCreateOutputFile(String message, Exception e) { + super(message, e); + } } public static class MissortedBAM extends UserException { @@ -319,4 +327,32 @@ public class UserException extends ReviewedStingException { "and try again.", null); } } + + public static class UnreadableKeyException extends UserException { + public UnreadableKeyException ( File f, Exception e ) { + super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " + + "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", + f.getAbsolutePath(), e.getMessage())); + } + + public UnreadableKeyException ( String message, Exception e ) { + this(String.format("%s. Error was: %s", message, e.getMessage())); + } + + public UnreadableKeyException ( String message ) { + super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " + + "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", + message)); + } + } + + public static class KeySignatureVerificationException extends UserException { + public KeySignatureVerificationException ( File f ) { + super(String.format("The signature in key file %s failed cryptographic verification. " + + "If this key was valid in the past, it's likely been revoked. " + + "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + + "for help.", + f.getAbsolutePath())); + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 7104b1edd..eea45567f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -151,23 +151,16 @@ public class FragmentUtils { final int numBases = firstReadStop + secondRead.getReadLength(); final byte[] bases = new byte[numBases]; final byte[] quals = new byte[numBases]; - // BUGBUG: too verbose, clean this up. final byte[] insertionQuals = new byte[numBases]; final byte[] deletionQuals = new byte[numBases]; final byte[] firstReadBases = firstRead.getReadBases(); final byte[] firstReadQuals = firstRead.getBaseQualities(); - final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities(); - final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities(); final byte[] secondReadBases = secondRead.getReadBases(); final byte[] secondReadQuals = secondRead.getBaseQualities(); - final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities(); - final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities(); for(int iii = 0; iii < firstReadStop; iii++) { bases[iii] = firstReadBases[iii]; quals[iii] = firstReadQuals[iii]; - insertionQuals[iii] = firstReadInsertionQuals[iii]; - deletionQuals[iii] = firstReadDeletionQuals[iii]; } for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) { if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) { @@ -175,22 +168,16 @@ public class FragmentUtils { } bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] ); quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] ); - insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score - deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score } for(int iii = firstRead.getReadLength(); iii < numBases; iii++) { bases[iii] = secondReadBases[iii-firstReadStop]; quals[iii] = secondReadQuals[iii-firstReadStop]; - insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop]; - deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop]; } final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader()); returnRead.setAlignmentStart(firstRead.getUnclippedStart()); returnRead.setReadBases( bases ); - returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ); - returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION ); - returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION ); + returnRead.setBaseQualities( quals ); returnRead.setReadGroup( firstRead.getReadGroup() ); returnRead.setReferenceName( firstRead.getReferenceName() ); final CigarElement c = new CigarElement(bases.length, CigarOperator.M); @@ -199,6 +186,27 @@ public class FragmentUtils { returnRead.setCigar( new Cigar( cList )); returnRead.setMappingQuality( firstRead.getMappingQuality() ); + if( firstRead.hasBaseIndelQualities() || secondRead.hasBaseIndelQualities() ) { + final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities(); + final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities(); + final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities(); + final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities(); + for(int iii = 0; iii < firstReadStop; iii++) { + insertionQuals[iii] = firstReadInsertionQuals[iii]; + deletionQuals[iii] = firstReadDeletionQuals[iii]; + } + for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) { + insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score + deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score + } + for(int iii = firstRead.getReadLength(); iii < numBases; iii++) { + insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop]; + deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop]; + } + returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION ); + returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION ); + } + final ArrayList returnList = new ArrayList(); returnList.add(returnRead); return returnList; diff --git a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java index a5ba857ef..160df0e51 100644 --- a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java @@ -29,10 +29,13 @@ import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.LineIterator; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.*; public class IOUtils { @@ -400,4 +403,173 @@ public class IOUtils { public static boolean isSpecialFile(File file) { return file != null && (file.getAbsolutePath().startsWith("/dev/") || file.equals(DEV_DIR)); } + + /** + * Reads the entirety of the given file into a byte array. Uses a read buffer size of 4096 bytes. + * + * @param source File to read + * @return The contents of the file as a byte array + */ + public static byte[] readFileIntoByteArray ( File source ) { + return readFileIntoByteArray(source, 4096); + } + + /** + * Reads the entirety of the given file into a byte array using the requested read buffer size. + * + * @param source File to read + * @param readBufferSize Number of bytes to read in at one time + * @return The contents of the file as a byte array + */ + public static byte[] readFileIntoByteArray ( File source, int readBufferSize ) { + if ( source == null ) { + throw new ReviewedStingException("Source file was null"); + } + + byte[] fileContents; + + try { + fileContents = readStreamIntoByteArray(new FileInputStream(source), readBufferSize); + } + catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(source, e); + } + + if ( fileContents.length != source.length() ) { + throw new UserException.CouldNotReadInputFile(String.format("Unable to completely read file %s: read only %d/%d bytes", + source.getAbsolutePath(), fileContents.length, source.length())); + } + + return fileContents; + } + + /** + * Reads all data from the given stream into a byte array. Uses a read buffer size of 4096 bytes. + * + * @param in Stream to read data from + * @return The contents of the stream as a byte array + */ + public static byte[] readStreamIntoByteArray ( InputStream in ) { + return readStreamIntoByteArray(in, 4096); + } + + /** + * Reads all data from the given stream into a byte array using the requested read buffer size. + * + * @param in Stream to read data from + * @param readBufferSize Number of bytes to read in at one time + * @return The contents of the stream as a byte array + */ + public static byte[] readStreamIntoByteArray ( InputStream in, int readBufferSize ) { + if ( in == null ) { + throw new ReviewedStingException("Input stream was null"); + } + else if ( readBufferSize <= 0 ) { + throw new ReviewedStingException("Read buffer size must be > 0"); + } + + // Use a fixed-size buffer for each read, but a dynamically-growing buffer + // to hold the accumulated contents of the file/stream: + byte[] readBuffer = new byte[readBufferSize]; + ByteArrayOutputStream fileBuffer = new ByteArrayOutputStream(readBufferSize * 4); + + try { + try { + int currentBytesRead; + + while ( (currentBytesRead = in.read(readBuffer, 0, readBuffer.length)) >= 0 ) { + fileBuffer.write(readBuffer, 0, currentBytesRead); + } + } + finally { + in.close(); + } + } + catch ( IOException e ) { + throw new UserException.CouldNotReadInputFile("I/O error reading from input stream", e); + } + + return fileBuffer.toByteArray(); + } + + /** + * Writes the given array of bytes to a file + * + * @param bytes Data to write + * @param destination File to write the data to + */ + public static void writeByteArrayToFile ( byte[] bytes, File destination ) { + if ( destination == null ) { + throw new ReviewedStingException("Destination file was null"); + } + + try { + writeByteArrayToStream(bytes, new FileOutputStream(destination)); + } + catch ( FileNotFoundException e ) { + throw new UserException.CouldNotCreateOutputFile(destination, e); + } + } + + /** + * Writes the given array of bytes to a stream + * + * @param bytes Data to write + * @param out Stream to write the data to + */ + public static void writeByteArrayToStream ( byte[] bytes, OutputStream out ) { + if ( bytes == null || out == null ) { + throw new ReviewedStingException("Data to write or output stream was null"); + } + + try { + try { + out.write(bytes); + } + finally { + out.close(); + } + } + catch ( IOException e ) { + throw new UserException.CouldNotCreateOutputFile("I/O error writing to output stream", e); + } + } + + /** + * Determines the uncompressed size of a GZIP file. Uses the GZIP ISIZE field in the last + * 4 bytes of the file to get this information. + * + * @param gzipFile GZIP-format file whose uncompressed size to determine + * @return The uncompressed size (in bytes) of the GZIP file + */ + public static int getGZIPFileUncompressedSize ( File gzipFile ) { + if ( gzipFile == null ) { + throw new ReviewedStingException("GZIP file to examine was null"); + } + + try { + // The GZIP ISIZE field holds the uncompressed size of the compressed data. + // It occupies the last 4 bytes of any GZIP file: + RandomAccessFile in = new RandomAccessFile(gzipFile, "r"); + in.seek(gzipFile.length() - 4); + byte[] sizeBytes = new byte[4]; + in.read(sizeBytes, 0, 4); + + ByteBuffer byteBuf = ByteBuffer.wrap(sizeBytes); + byteBuf.order(ByteOrder.LITTLE_ENDIAN); // The GZIP spec mandates little-endian byte order + int uncompressedSize = byteBuf.getInt(); + + // If the size read in is negative, we've overflowed our signed integer: + if ( uncompressedSize < 0 ) { + throw new UserException.CouldNotReadInputFile(String.format("Cannot accurately determine the uncompressed size of file %s " + + "because it's either larger than %d bytes or the GZIP ISIZE field is corrupt", + gzipFile.getAbsolutePath(), Integer.MAX_VALUE)); + } + + return uncompressedSize; + } + catch ( IOException e ) { + throw new UserException.CouldNotReadInputFile(gzipFile, e); + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f5a9b2f45..648dafb81 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -194,6 +194,10 @@ public class GATKSAMRecord extends BAMRecord { } } + public boolean hasBaseIndelQualities() { + return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null; + } + public byte[] getBaseInsertionQualities() { byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) ); if( quals == null ) { diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 626b91cbf..bc4ce098b 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -6,6 +6,7 @@ import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; import org.apache.log4j.spi.LoggingEvent; import org.broadinstitute.sting.commandline.CommandLineUtils; +import org.broadinstitute.sting.utils.crypt.CryptUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.io.IOUtils; @@ -87,6 +88,9 @@ public abstract class BaseTest { public static final File testDirFile = new File("public/testdata/"); public static final String testDir = testDirFile.getAbsolutePath() + "/"; + public static final String keysDataLocation = validationDataLocation + "keys/"; + public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key"; + /** before the class starts up */ static { // setup a basic log configuration @@ -141,7 +145,7 @@ public abstract class BaseTest { */ public static class TestDataProvider { private static final Map> tests = new HashMap>(); - private String name; + protected String name; /** * Create a new TestDataProvider instance bound to the class variable C diff --git a/public/java/test/org/broadinstitute/sting/MedianUnitTest.java b/public/java/test/org/broadinstitute/sting/MedianUnitTest.java new file mode 100644 index 000000000..db89aee78 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/MedianUnitTest.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.utils.Median; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class MedianUnitTest extends BaseTest { + + // -------------------------------------------------------------------------------- + // + // Provider + // + // -------------------------------------------------------------------------------- + + private class MedianTestProvider extends TestDataProvider { + final List values = new ArrayList(); + final int cap; + final Integer expected; + + public MedianTestProvider(int expected, int cap, Integer ... values) { + super(MedianTestProvider.class); + this.expected = expected; + this.cap = cap; + this.values.addAll(Arrays.asList(values)); + this.name = String.format("values=%s expected=%d cap=%d", this.values, expected, cap); + } + } + + @DataProvider(name = "MedianTestProvider") + public Object[][] makeMedianTestProvider() { + new MedianTestProvider(1, 1000, 0, 1, 2); + new MedianTestProvider(1, 1000, 1, 0, 1, 2); + new MedianTestProvider(1, 1000, 0, 1, 2, 3); + new MedianTestProvider(2, 1000, 0, 1, 2, 3, 4); + new MedianTestProvider(2, 1000, 4, 1, 2, 3, 0); + new MedianTestProvider(1, 1000, 1); + new MedianTestProvider(2, 1000, 2); + new MedianTestProvider(1, 1000, 1, 2); + + new MedianTestProvider(1, 3, 1); + new MedianTestProvider(1, 3, 1, 2); + new MedianTestProvider(2, 3, 1, 2, 3); + new MedianTestProvider(2, 3, 1, 2, 3, 4); + new MedianTestProvider(2, 3, 1, 2, 3, 4, 5); + + new MedianTestProvider(1, 3, 1); + new MedianTestProvider(1, 3, 1, 2); + new MedianTestProvider(2, 3, 3, 2, 1); + new MedianTestProvider(3, 3, 4, 3, 2, 1); + new MedianTestProvider(4, 3, 5, 4, 3, 2, 1); + + return MedianTestProvider.getTests(MedianTestProvider.class); + } + + @Test(dataProvider = "MedianTestProvider") + public void testBasicLikelihoods(MedianTestProvider cfg) { + final Median median = new Median(cfg.cap); + + int nAdded = 0; + for ( final int value : cfg.values ) + if ( median.add(value) ) + nAdded++; + + Assert.assertEquals(nAdded, median.size()); + + Assert.assertEquals(cfg.values.isEmpty(), median.isEmpty()); + Assert.assertEquals(cfg.values.size() >= cfg.cap, median.isFull()); + Assert.assertEquals(median.getMedian(), cfg.expected, cfg.toString()); + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testEmptyMedian() { + final Median median = new Median(); + Assert.assertTrue(median.isEmpty()); + final Integer d = 100; + Assert.assertEquals(median.getMedian(d), d); + median.getMedian(); + } + +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index ca7653b58..c9e3b6b1b 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -30,6 +30,7 @@ import org.broad.tribble.FeatureCodec; import org.broad.tribble.Tribble; import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.CommandLineGATK; @@ -45,7 +46,7 @@ import java.io.File; import java.util.*; public class WalkerTest extends BaseTest { - private static final boolean ENABLE_REPORTING = false; + private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false; @BeforeMethod public void initializeRandomGenerator() { @@ -121,11 +122,19 @@ public class WalkerTest extends BaseTest { } public class WalkerTestSpec { + + // Arguments implicitly included in all Walker command lines, unless explicitly + // disabled using the disableImplicitArgs() method below. + final String IMPLICIT_ARGS = ENABLE_PHONE_HOME_FOR_TESTS ? + String.format("-et %s", GATKRunReport.PhoneHomeOption.STANDARD) : + String.format("-et %s -K %s", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile); + String args = ""; int nOutputFiles = -1; List md5s = null; List exts = null; Class expectedException = null; + boolean includeImplicitArgs = true; // the default output path for the integration test private File outputFileLocation = null; @@ -159,6 +168,10 @@ public class WalkerTest extends BaseTest { this.expectedException = expectedException; } + public String getArgsWithImplicitArgs() { + return args + (includeImplicitArgs ? " " + IMPLICIT_ARGS : ""); + } + public void setOutputFileLocation(File outputFileLocation) { this.outputFileLocation = outputFileLocation; } @@ -180,6 +193,9 @@ public class WalkerTest extends BaseTest { auxillaryFiles.put(expectededMD5sum, outputfile); } + public void disableImplicitArgs() { + includeImplicitArgs = false; + } } protected boolean parameterize() { @@ -213,7 +229,7 @@ public class WalkerTest extends BaseTest { tmpFiles.add(fl); } - final String args = String.format(spec.args, tmpFiles.toArray()); + final String args = String.format(spec.getArgsWithImplicitArgs(), tmpFiles.toArray()); System.out.println(Utils.dupString('-', 80)); if ( spec.expectsException() ) { @@ -277,13 +293,10 @@ public class WalkerTest extends BaseTest { * @param args the argument list * @param expectedException the expected exception or null */ - public static void executeTest(String name, String args, Class expectedException) { + private void executeTest(String name, String args, Class expectedException) { CommandLineGATK instance = new CommandLineGATK(); String[] command = Utils.escapeExpressions(args); - // add the logging level to each of the integration test commands - command = Utils.appendArray(command, "-et", ENABLE_REPORTING ? "STANDARD" : "NO_ET"); - // run the executable boolean gotAnException = false; try { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java new file mode 100644 index 000000000..aa6a72ef9 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java @@ -0,0 +1,103 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.BitSet; +import java.util.Random; + +/** + * Short one line description of the walker. + * + *

+ * [Long description of the walker] + *

+ * + * + *

Input

+ *

+ * [Description of the Input] + *

+ * + *

Output

+ *

+ * [Description of the Output] + *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T [walker name]
+ *  
+ * + * @author Mauricio Carneiro + * @since 3/1/12 + */ +public class ContextCovariateUnitTest { + ContextCovariate covariate; + RecalibrationArgumentCollection RAC; + Random random; + + @BeforeClass + public void init() { + RAC = new RecalibrationArgumentCollection(); + covariate = new ContextCovariate(); + random = GenomeAnalysisEngine.getRandomGenerator(); + covariate.initialize(RAC); + + } + + @Test(enabled = true) + public void testSimpleContexts() { + byte [] quals = createRandomReadQuals(101); + byte [] bbases = createRandomReadBases(101); + String bases = stringFrom(bbases); + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M"); + CovariateValues values = covariate.getValues(read); + verifyCovariateArray((BitSet []) values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases); + verifyCovariateArray((BitSet []) values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases); + verifyCovariateArray((BitSet []) values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases); + } + + private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) { + for (int i=0; i= contextSize) + Assert.assertEquals(MathUtils.dnaFrom(values[i]), bases.substring(i-contextSize, i)); + else + Assert.assertNull(values[i]); + } + } + + private String stringFrom(byte [] array) { + String s = ""; + for (byte value : array) + s += (char) value; + return s; + } + + private byte [] createRandomReadQuals(int length) { + byte [] quals = new byte[length]; + for (int i=0; i e = new HashMap(); - e.put( 0.01, "2cb2544739e01f6c08fd820112914317" ); - e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" ); + e.put( 0.01, "926b58038dd4989bf7eda697a847eea9" ); + e.put( 1.0 / 1850, "93f44105b43b65730a3b821e27b0fa16" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -204,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("f0fbe472f155baf594b1eeb58166edef")); + Arrays.asList("a1b75a7e12b160b0be823228c958573f")); executeTest(String.format("test multiple technologies"), spec); } @@ -223,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272")); + Arrays.asList("3bda1279cd6dcb47885f3e19466f11b9")); executeTest(String.format("test calling with BAQ"), spec); } @@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899")); + Arrays.asList("d9fc3ba94a0d46029778c7b457e7292a")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -257,7 +257,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6")); + Arrays.asList("b2e30ae3e5ffa6108f9f6178b1d2e679")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f")); + Arrays.asList("2cd182a84613fa91a6020466d2d327e2")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd")); + Arrays.asList("9cd08dc412a007933381e9c76c073899")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); } @@ -290,7 +290,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("320f61c87253aba77d6dc782242cba8b")); + Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } @@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("c9897b80615c53a4ea10a4b193d56d9c")); + Arrays.asList("2609675a356f2dfc86f8a1d911210978")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("5282fdb1711a532d726c13507bf80a21")); + Arrays.asList("4fdd8da77167881b71b3547da5c13f94")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); } diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java new file mode 100644 index 000000000..25bd7a2eb --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils; + + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Basic unit test for Haplotype Class + */ +public class HaplotypeUnitTest extends BaseTest { + @BeforeClass + public void init() { + } + + @Test + public void testSimpleInsertionAllele() { + final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA"; + basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testSimpleDeletionAllele() { + final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; + basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testSimpleSNPAllele() { + final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA"; + basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testComplexInsertionAllele() { + final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + h1CigarList.add(new CigarElement(10, CigarOperator.I)); + h1CigarList.add(new CigarElement(8, CigarOperator.M)); + h1CigarList.add(new CigarElement(3, CigarOperator.D)); + h1CigarList.add(new CigarElement(7, CigarOperator.M)); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC"; + basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testComplexDeletionAllele() { + final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + h1CigarList.add(new CigarElement(10, CigarOperator.I)); + h1CigarList.add(new CigarElement(8, CigarOperator.M)); + h1CigarList.add(new CigarElement(3, CigarOperator.D)); + h1CigarList.add(new CigarElement(7, CigarOperator.M)); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC"; + basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + } + + @Test + public void testComplexSNPAllele() { + final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + + final ArrayList h1CigarList = new ArrayList(); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + h1CigarList.add(new CigarElement(10, CigarOperator.I)); + h1CigarList.add(new CigarElement(8, CigarOperator.M)); + h1CigarList.add(new CigarElement(3, CigarOperator.D)); + h1CigarList.add(new CigarElement(7, CigarOperator.M)); + h1CigarList.add(new CigarElement(4, CigarOperator.M)); + final Cigar h1Cigar = new Cigar(h1CigarList); + String h1bases = "AGCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("T", "G", 1, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCTATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("G", "T", 7, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGCGGGA" + "AGGC"; + basicInsertTest("G", "C", 17, h1Cigar, bases, h1bases); + } + + private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) { + final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1); + final Haplotype h = new Haplotype(hap.getBytes()); + final Allele h1refAllele = Allele.create(ref, true); + final Allele h1altAllele = Allele.create(alt, false); + final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE, 0, cigar) ); + final Haplotype h1expected = new Haplotype(newHap.getBytes()); + Assert.assertEquals(h1, h1expected); + + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index 5b50c91a6..1ba6c74d4 100755 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -205,16 +205,9 @@ public class MathUtilsUnitTest extends BaseTest { } } - @Test(enabled = true) - public void testIntAndBitSetConversion() { - Assert.assertEquals(428, MathUtils.intFrom(MathUtils.bitSetFrom(428))); - Assert.assertEquals(239847, MathUtils.intFrom(MathUtils.bitSetFrom(239847))); - Assert.assertEquals(12726, MathUtils.intFrom(MathUtils.bitSetFrom(12726))); - Assert.assertEquals(0, MathUtils.intFrom(MathUtils.bitSetFrom(0))); - Assert.assertEquals(1, MathUtils.intFrom(MathUtils.bitSetFrom(1))); - Assert.assertEquals(65536, MathUtils.intFrom(MathUtils.bitSetFrom(65536))); - } - + /** + * Private functions used by testArrayShuffle() + */ private boolean hasUniqueElements(Object[] x) { for (int i = 0; i < x.length; i++) for (int j = i + 1; j < x.length; j++) @@ -230,10 +223,100 @@ public class MathUtilsUnitTest extends BaseTest { return set.isEmpty(); } - private void p (Object []x) { - for (Object v: x) - System.out.print((Integer) v + " "); - System.out.println(); + @Test(enabled = true) + public void testIntAndBitSetConversion() { + Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428); + Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(239847)), 239847); + Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(12726)), 12726); + Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(0)), 0); + Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(1)), 1); + Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(65536)), 65536); + Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(Long.MAX_VALUE)), Long.MAX_VALUE); } + @Test(enabled = true) + public void testDNAAndBitSetConversion() { + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("ACGT")), "ACGT"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AGGTGTTGT")), "AGGTGTTGT"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("A")), "A"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("C")), "C"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("G")), "G"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("T")), "T"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CC")), "CC"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AA")), "AA"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AAAA")), "AAAA"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CCCCCCCCCCCCCC")), "CCCCCCCCCCCCCC"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GGGGGGGGGGGGGG")), "GGGGGGGGGGGGGG"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("TTTTTTTTTTTTTT")), "TTTTTTTTTTTTTT"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GTAGACCGATCTCAGCTAGT")), "GTAGACCGATCTCAGCTAGT"); + Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31) + } + + @Test + public void testApproximateLog10SumLog10() { + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3); + + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3); + + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3); + } + + @Test + public void testNormalizeFromLog10() { + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9})); + + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211})); + } + + /** + * Private function used by testNormalizeFromLog10() + */ + private boolean compareDoubleArrays(double[] b1, double[] b2) { + if( b1.length != b2.length ) { + return false; // sanity check + } + + for( int i=0; i < b1.length; i++ ){ + if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 ) + return false; + } + return true; + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java new file mode 100644 index 000000000..eae4486c6 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import org.testng.Assert; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.Key; +import java.security.KeyPair; +import java.security.PrivateKey; +import java.security.PublicKey; +import java.util.Arrays; + +public class CryptUtilsUnitTest extends BaseTest { + + @Test + public void testGenerateValidKeyPairWithDefaultSettings() { + KeyPair keyPair = CryptUtils.generateKeyPair(); + Assert.assertTrue(CryptUtils.keysDecryptEachOther(keyPair.getPrivate(), keyPair.getPublic())); + } + + @DataProvider( name = "InvalidKeyPairSettings" ) + public Object[][] invalidKeyPairSettingsDataProvider() { + return new Object[][] { + { -1, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM}, + { CryptUtils.DEFAULT_KEY_LENGTH, "Made-up algorithm", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM}, + { CryptUtils.DEFAULT_KEY_LENGTH, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, "Made-up algorithm"} + }; + } + + @Test( dataProvider = "InvalidKeyPairSettings", expectedExceptions = ReviewedStingException.class ) + public void testGenerateKeyPairWithInvalidSettings( int keyLength, String encryptionAlgorithm, String randomNumberGenerationAlgorithm ) { + KeyPair keyPair = CryptUtils.generateKeyPair(keyLength, encryptionAlgorithm, randomNumberGenerationAlgorithm); + } + + @Test + public void testGATKMasterKeyPairMutualDecryption() { + Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey())); + } + + @Test + public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() { + Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey())); + } + + @Test + public void testKeyPairWriteThenRead() { + KeyPair keyPair = CryptUtils.generateKeyPair(); + File privateKeyFile = createTempFile("testKeyPairWriteThenRead_private", "key"); + File publicKeyFile = createTempFile("testKeyPairWriteThenRead_public", "key"); + + CryptUtils.writeKeyPair(keyPair, privateKeyFile, publicKeyFile); + + assertKeysAreEqual(keyPair.getPrivate(), CryptUtils.readPrivateKey(privateKeyFile)); + assertKeysAreEqual(keyPair.getPublic(), CryptUtils.readPublicKey(publicKeyFile)); + } + + @Test + public void testPublicKeyWriteThenReadFromFile() { + File keyFile = createTempFile("testPublicKeyWriteThenReadFromFile", "key"); + PublicKey publicKey = CryptUtils.generateKeyPair().getPublic(); + + CryptUtils.writeKey(publicKey, keyFile); + + assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(keyFile)); + } + + @Test + public void testPublicKeyWriteThenReadFromStream() throws IOException { + File keyFile = createTempFile("testPublicKeyWriteThenReadFromStream", "key"); + PublicKey publicKey = CryptUtils.generateKeyPair().getPublic(); + + CryptUtils.writeKey(publicKey, keyFile); + + assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(new FileInputStream(keyFile))); + } + + @Test + public void testPrivateKeyWriteThenReadFromFile() { + File keyFile = createTempFile("testPrivateKeyWriteThenReadFromFile", "key"); + PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate(); + + CryptUtils.writeKey(privateKey, keyFile); + + assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(keyFile)); + } + + @Test + public void testPrivateKeyWriteThenReadFromStream() throws IOException { + File keyFile = createTempFile("testPrivateKeyWriteThenReadFromStream", "key"); + PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate(); + + CryptUtils.writeKey(privateKey, keyFile); + + assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(new FileInputStream(keyFile))); + } + + @Test( expectedExceptions = UserException.CouldNotReadInputFile.class ) + public void testReadNonExistentPublicKey() { + File nonExistentFile = new File("jdshgkdfhg.key"); + Assert.assertFalse(nonExistentFile.exists()); + + CryptUtils.readPublicKey(nonExistentFile); + } + + @Test( expectedExceptions = UserException.CouldNotReadInputFile.class ) + public void testReadNonExistentPrivateKey() { + File nonExistentFile = new File("jdshgkdfhg.key"); + Assert.assertFalse(nonExistentFile.exists()); + + CryptUtils.readPrivateKey(nonExistentFile); + } + + @Test + public void testDecodePublicKey() { + PublicKey originalKey = CryptUtils.generateKeyPair().getPublic(); + PublicKey decodedKey = CryptUtils.decodePublicKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM); + assertKeysAreEqual(originalKey, decodedKey); + } + + @Test + public void testDecodePrivateKey() { + PrivateKey originalKey = CryptUtils.generateKeyPair().getPrivate(); + PrivateKey decodedKey = CryptUtils.decodePrivateKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM); + assertKeysAreEqual(originalKey, decodedKey); + } + + @Test + public void testLoadGATKMasterPrivateKey() { + PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); + } + + @Test + public void testLoadGATKMasterPublicKey() { + PublicKey gatkMasterPublicKey = CryptUtils.loadGATKMasterPublicKey(); + } + + @Test + public void testLoadGATKDistributedPublicKey() { + PublicKey gatkDistributedPublicKey = CryptUtils.loadGATKDistributedPublicKey(); + } + + private void assertKeysAreEqual( Key originalKey, Key keyFromDisk ) { + Assert.assertTrue(Arrays.equals(originalKey.getEncoded(), keyFromDisk.getEncoded())); + Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm()); + Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat()); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java new file mode 100644 index 000000000..8fb75ef38 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class GATKKeyIntegrationTest extends WalkerTest { + + public static final String BASE_COMMAND = String.format("-T PrintReads -R %s -I %s -o %%s", + testDir + "exampleFASTA.fasta", + testDir + "exampleBAM.bam"); + public static final String MD5_UPON_SUCCESSFUL_RUN = "b9dc5bf6753ca2819e70b056eaf61258"; + + + private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) { + String command = BASE_COMMAND + String.format(" %s %s", etArg, keyArg); + + WalkerTestSpec spec = expectedException != null ? + new WalkerTestSpec(command, 1, expectedException) : + new WalkerTestSpec(command, 1, Arrays.asList(md5)); + + spec.disableImplicitArgs(); // Turn off automatic inclusion of -et/-K args by WalkerTest + executeTest(testName, spec); + } + + @Test + public void testValidKeyNoET() { + runGATKKeyTest("testValidKeyNoET", + "-et " + GATKRunReport.PhoneHomeOption.NO_ET, + "-K " + keysDataLocation + "valid.key", + null, + MD5_UPON_SUCCESSFUL_RUN); + } + + @Test + public void testValidKeyETStdout() { + runGATKKeyTest("testValidKeyETStdout", + "-et " + GATKRunReport.PhoneHomeOption.STDOUT, + "-K " + keysDataLocation + "valid.key", + null, + MD5_UPON_SUCCESSFUL_RUN); + } + + @Test + public void testValidKeyETStandard() { + runGATKKeyTest("testValidKeyETStandard", + "", + "-K " + keysDataLocation + "valid.key", + null, + MD5_UPON_SUCCESSFUL_RUN); + } + + @Test + public void testNoKeyNoET() { + runGATKKeyTest("testNoKeyNoET", + "-et " + GATKRunReport.PhoneHomeOption.NO_ET, + "", + UserException.class, + null); + } + + @Test + public void testNoKeyETStdout() { + runGATKKeyTest("testNoKeyETStdout", + "-et " + GATKRunReport.PhoneHomeOption.STDOUT, + "", + UserException.class, + null); + } + + @Test + public void testNoKeyETStandard() { + runGATKKeyTest("testNoKeyETStandard", + "", + "", + null, + MD5_UPON_SUCCESSFUL_RUN); + } + + @Test + public void testRevokedKey() { + runGATKKeyTest("testRevokedKey", + "-et " + GATKRunReport.PhoneHomeOption.NO_ET, + "-K " + keysDataLocation + "revoked.key", + UserException.KeySignatureVerificationException.class, + null); + } + + @DataProvider(name = "CorruptKeyTestData") + public Object[][] corruptKeyDataProvider() { + return new Object[][] { + { "corrupt_empty.key", UserException.UnreadableKeyException.class }, + { "corrupt_single_byte_file.key", UserException.UnreadableKeyException.class }, + { "corrupt_random_contents.key", UserException.UnreadableKeyException.class }, + { "corrupt_single_byte_deletion.key", UserException.UnreadableKeyException.class }, + { "corrupt_single_byte_insertion.key", UserException.UnreadableKeyException.class }, + { "corrupt_single_byte_change.key", UserException.UnreadableKeyException.class }, + { "corrupt_multi_byte_deletion.key", UserException.UnreadableKeyException.class }, + { "corrupt_multi_byte_insertion.key", UserException.UnreadableKeyException.class }, + { "corrupt_multi_byte_change.key", UserException.UnreadableKeyException.class }, + { "corrupt_bad_isize_field.key", UserException.UnreadableKeyException.class }, + { "corrupt_bad_crc.key", UserException.UnreadableKeyException.class }, + { "corrupt_no_email_address.key", UserException.UnreadableKeyException.class }, + { "corrupt_no_sectional_delimiter.key", UserException.KeySignatureVerificationException.class }, + { "corrupt_no_signature.key", UserException.UnreadableKeyException.class }, + { "corrupt_bad_signature.key", UserException.KeySignatureVerificationException.class }, + { "corrupt_non_gzipped_valid_key.key", UserException.UnreadableKeyException.class } + }; + } + + @Test(dataProvider = "CorruptKeyTestData") + public void testCorruptKey ( String corruptKeyName, Class expectedException ) { + runGATKKeyTest(String.format("testCorruptKey (%s)", corruptKeyName), + "-et " + GATKRunReport.PhoneHomeOption.NO_ET, + "-K " + keysDataLocation + corruptKeyName, + expectedException, + null); + } + + @Test + public void testCorruptButNonRequiredKey() { + runGATKKeyTest("testCorruptButNonRequiredKey", + "", + "-K " + keysDataLocation + "corrupt_random_contents.key", + null, + MD5_UPON_SUCCESSFUL_RUN); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java new file mode 100644 index 000000000..5e7b07a1e --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.crypt; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.Test; +import org.testng.Assert; + +import java.io.File; +import java.security.KeyPair; +import java.security.PrivateKey; +import java.security.PublicKey; + +public class GATKKeyUnitTest extends BaseTest { + + @Test + public void testCreateGATKKeyUsingMasterKeyPair() { + PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); + PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey(); + + // We should be able to create a valid GATKKey using our master key pair: + GATKKey key = new GATKKey(masterPrivateKey, masterPublicKey, "foo@bar.com"); + Assert.assertTrue(key.isValid()); + } + + @Test + public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() { + PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); + PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey(); + + // We should also be able to create a valid GATKKey using our master private + // key and the public key we distribute with the GATK: + GATKKey key = new GATKKey(masterPrivateKey, distributedPublicKey, "foo@bar.com"); + Assert.assertTrue(key.isValid()); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testKeyPairMismatch() { + KeyPair firstKeyPair = CryptUtils.generateKeyPair(); + KeyPair secondKeyPair = CryptUtils.generateKeyPair(); + + // Attempting to create a GATK Key with private and public keys that aren't part of the + // same key pair should immediately trigger a validation failure: + GATKKey key = new GATKKey(firstKeyPair.getPrivate(), secondKeyPair.getPublic(), "foo@bar.com"); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testEncryptionAlgorithmMismatch() { + KeyPair keyPair = CryptUtils.generateKeyPair(CryptUtils.DEFAULT_KEY_LENGTH, "DSA", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM); + + // Attempting to use a DSA private key to create an RSA signature should throw an error: + GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), "foo@bar.com", "SHA1withRSA"); + } + + @Test( expectedExceptions = UserException.class ) + public void testInvalidEmailAddress() { + String emailAddressWithNulByte = new String(new byte[] { 0 }); + KeyPair keyPair = CryptUtils.generateKeyPair(); + + // Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file: + GATKKey key = new GATKKey(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey(), + emailAddressWithNulByte); + } + + @Test + public void testCreateGATKKeyFromValidKeyFile() { + GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "valid.key")); + Assert.assertTrue(key.isValid()); + } + + @Test( expectedExceptions = UserException.UnreadableKeyException.class ) + public void testCreateGATKKeyFromCorruptKeyFile() { + GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "corrupt_random_contents.key")); + } + + @Test + public void testCreateGATKKeyFromRevokedKeyFile() { + GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "revoked.key")); + Assert.assertFalse(key.isValid()); + } + + @Test( expectedExceptions = UserException.CouldNotReadInputFile.class ) + public void testCreateGATKKeyFromNonExistentFile() { + File nonExistentFile = new File("ghfdkgsdhg.key"); + Assert.assertFalse(nonExistentFile.exists()); + + GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java index 757e6efdf..941d2b14c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java @@ -27,12 +27,18 @@ package org.broadinstitute.sting.utils.io; import org.apache.commons.io.FileUtils; import org.broadinstitute.sting.BaseTest; import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.Random; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; public class IOUtilsUnitTest extends BaseTest { @@ -230,4 +236,90 @@ public class IOUtilsUnitTest extends BaseTest { Assert.assertFalse(IOUtils.isSpecialFile(new File("/home/user/my.file"))); Assert.assertFalse(IOUtils.isSpecialFile(new File("/devfake/null"))); } + + @DataProvider( name = "ByteArrayIOTestData") + public Object[][] byteArrayIOTestDataProvider() { + return new Object[][] { + // file size, read buffer size + { 0, 4096 }, + { 1, 4096 }, + { 2000, 4096 }, + { 4095, 4096 }, + { 4096, 4096 }, + { 4097, 4096 }, + { 6000, 4096 }, + { 8191, 4096 }, + { 8192, 4096 }, + { 8193, 4096 }, + { 10000, 4096 } + }; + } + + @Test( dataProvider = "ByteArrayIOTestData" ) + public void testWriteThenReadFileIntoByteArray ( int fileSize, int readBufferSize ) throws Exception { + File tempFile = createTempFile(String.format("testWriteThenReadFileIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp"); + + byte[] dataWritten = getDeterministicRandomData(fileSize); + IOUtils.writeByteArrayToFile(dataWritten, tempFile); + byte[] dataRead = IOUtils.readFileIntoByteArray(tempFile, readBufferSize); + + Assert.assertEquals(dataRead.length, dataWritten.length); + Assert.assertTrue(Arrays.equals(dataRead, dataWritten)); + } + + @Test( dataProvider = "ByteArrayIOTestData" ) + public void testWriteThenReadStreamIntoByteArray ( int fileSize, int readBufferSize ) throws Exception { + File tempFile = createTempFile(String.format("testWriteThenReadStreamIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp"); + + byte[] dataWritten = getDeterministicRandomData(fileSize); + IOUtils.writeByteArrayToStream(dataWritten, new FileOutputStream(tempFile)); + byte[] dataRead = IOUtils.readStreamIntoByteArray(new FileInputStream(tempFile), readBufferSize); + + Assert.assertEquals(dataRead.length, dataWritten.length); + Assert.assertTrue(Arrays.equals(dataRead, dataWritten)); + } + + @Test( expectedExceptions = UserException.CouldNotReadInputFile.class ) + public void testReadNonExistentFileIntoByteArray() { + File nonExistentFile = new File("djfhsdkjghdfk"); + Assert.assertFalse(nonExistentFile.exists()); + + IOUtils.readFileIntoByteArray(nonExistentFile); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testReadNullStreamIntoByteArray() { + IOUtils.readStreamIntoByteArray(null); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testReadStreamIntoByteArrayInvalidBufferSize() throws Exception { + IOUtils.readStreamIntoByteArray(new FileInputStream(createTempFile("testReadStreamIntoByteArrayInvalidBufferSize", "tmp")), + -1); + } + + @Test( expectedExceptions = UserException.CouldNotCreateOutputFile.class ) + public void testWriteByteArrayToUncreatableFile() { + IOUtils.writeByteArrayToFile(new byte[]{0}, new File("/dev/foo/bar")); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testWriteNullByteArrayToFile() { + IOUtils.writeByteArrayToFile(null, createTempFile("testWriteNullByteArrayToFile", "tmp")); + } + + @Test( expectedExceptions = ReviewedStingException.class ) + public void testWriteByteArrayToNullStream() { + IOUtils.writeByteArrayToStream(new byte[]{0}, null); + } + + private byte[] getDeterministicRandomData ( int size ) { + GenomeAnalysisEngine.resetRandomGenerator(); + Random rand = GenomeAnalysisEngine.getRandomGenerator(); + + byte[] randomData = new byte[size]; + rand.nextBytes(randomData); + + return randomData; + } } diff --git a/public/keys/GATK_public.key b/public/keys/GATK_public.key new file mode 100644 index 000000000..05cdde1c2 Binary files /dev/null and b/public/keys/GATK_public.key differ diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml index 283b5eabf..68459f6d2 100644 --- a/public/packages/GATKEngine.xml +++ b/public/packages/GATKEngine.xml @@ -36,6 +36,8 @@ + +