Merge branch 'master' of ssh://ni.broadinstitute.org/humgen/gsa-scr1/chartl/dev/unstable

This commit is contained in:
Christopher Hartl 2012-03-06 14:23:14 -05:00
commit 67def6acc8
44 changed files with 2746 additions and 274 deletions

View File

@ -47,6 +47,7 @@
<property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
<property name="resource.file" value="StingText.properties" />
<property name="resource.path" value="${java.classes}/StingText.properties" />
<property name="key.dir" value="${public.dir}/keys" />
<property name="scala.public.source.dir" value="${public.dir}/scala/src" />
<property name="scala.private.source.dir" value="${private.dir}/scala/src" />
@ -567,6 +568,7 @@
</fileset>
<fileset dir="${java.classes}" includes="**/commandline/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/pipeline/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/tools/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
@ -615,6 +617,9 @@
<include name="**/gatk/**/*.R"/>
<include name="**/alignment/**/*.R"/>
</fileset>
<fileset dir="${key.dir}">
<include name="**/*.key"/>
</fileset>
<manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
</manifest>
@ -879,6 +884,7 @@
<pathelement location="${R.tar.dir}" />
<pathelement location="${R.public.scripts.dir}" />
<pathelement location="${R.private.scripts.dir}" />
<pathelement location="${key.dir}" />
<path refid="external.dependencies" />
</path>

View File

@ -4,11 +4,9 @@
colnames(d) = tableHeader;
for (i in 1:ncol(d)) {
v = suppressWarnings(as.numeric(d[,i]));
if (length(na.omit(as.numeric(v))) == length(d[,i])) {
d[,i] = v;
}
# use the general type.convert infrastructure of read.table to convert column data to R types
v = type.convert(d[,i])
d[,i] = v;
}
usedNames = ls(envir=tableEnv, pattern=tableName);

View File

@ -35,9 +35,12 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.crypt.CryptUtils;
import org.broadinstitute.sting.utils.crypt.GATKKey;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import java.security.PublicKey;
import java.util.*;
/**
@ -78,6 +81,9 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
Walker<?,?> walker = engine.getWalkerByName(getAnalysisName());
try {
// Make sure a valid GATK user key is present, if required.
authorizeGATKRun();
engine.setArguments(getArgumentCollection());
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
@ -130,6 +136,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
return 0;
}
/**
* Authorizes this run of the GATK by checking for a valid GATK user key, if required.
* Currently, a key is required only if running with the -et NO_ET or -et STDOUT options.
*/
private void authorizeGATKRun() {
if ( getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.NO_ET ||
getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
if ( getArgumentCollection().gatkKeyFile == null ) {
throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
"for more information and instructions on how to obtain a key.");
}
else {
PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();
GATKKey gatkUserKey = new GATKKey(gatkPublicKey, getArgumentCollection().gatkKeyFile);
if ( ! gatkUserKey.isValid() ) {
throw new UserException.KeySignatureVerificationException(getArgumentCollection().gatkKeyFile);
}
}
}
}
/**
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.

View File

@ -65,9 +65,12 @@ public class GATKArgumentCollection {
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
public Integer readBufferSize = null;
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
public File gatkKeyFile = null;
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
public List<String> readFilters = new ArrayList<String>();

View File

@ -154,9 +154,7 @@ public class GATKRunReport {
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
STANDARD,
/** Force output to STDOUT. For debugging only */
STDOUT,
/** Force output to S3. For debugging only */
AWS_S3 // todo -- remove me -- really just for testing purposes
STDOUT
}
private static final DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss");
@ -239,11 +237,8 @@ public class GATKRunReport {
case STDOUT:
postReportToStream(System.out);
break;
case AWS_S3:
postReportToAWSS3();
break;
default:
exceptDuringRunReport("BUG: unexcepted PhoneHomeOption ");
exceptDuringRunReport("BUG: unexpected PhoneHomeOption ");
break;
}
}

View File

@ -43,15 +43,15 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
final ArrayList<Double> altQuals = new ArrayList<Double>();
if ( vc.isSNP() ) {
final List<Byte> altAlleles = new ArrayList<Byte>();
for ( final Allele a : vc.getAlternateAlleles() )
altAlleles.add(a.getBases()[0]);
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null )
continue;
final List<Byte> altAlleles = new ArrayList<Byte>();
for ( final Allele a : vc.getAlternateAlleles() )
altAlleles.add(a.getBases()[0]);
fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals);
}
} else if ( vc.isIndel() || vc.isMixed() ) {

View File

@ -26,10 +26,12 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
import java.util.BitSet;
/**
* Created by IntelliJ IDEA.
@ -43,10 +45,6 @@ public class ContextCovariate implements StandardCovariate {
private int insertionsContextSize;
private int deletionsContextSize;
private String mismatchesNoContext = "";
private String insertionsNoContext = "";
private String deletionsNoContext = "";
// Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize(final RecalibrationArgumentCollection RAC) {
@ -57,29 +55,26 @@ public class ContextCovariate implements StandardCovariate {
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
// initialize no context strings given the size of the context for each covariate type
mismatchesNoContext = makeAllNStringWithLength(mismatchesContextSize);
insertionsNoContext = makeAllNStringWithLength(insertionsContextSize);
deletionsNoContext = makeAllNStringWithLength( deletionsContextSize);
}
@Override
public CovariateValues getValues(final GATKSAMRecord read) {
int l = read.getReadLength();
String[] mismatches = new String [l];
String[] insertions = new String [l];
String[] deletions = new String [l];
BitSet[] mismatches = new BitSet[l];
BitSet[] insertions = new BitSet[l];
BitSet[] deletions = new BitSet[l];
final boolean negativeStrand = read.getReadNegativeStrandFlag();
byte[] bases = read.getReadBases();
if (negativeStrand) {
bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
}
if (negativeStrand)
bases = BaseUtils.simpleReverseComplement(bases);
for (int i = 0; i < read.getReadLength(); i++) {
mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext);
insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext);
deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext);
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
insertions[i] = contextWith(bases, i, insertionsContextSize);
deletions[i] = contextWith(bases, i, deletionsContextSize);
}
if (negativeStrand) {
reverse(mismatches);
reverse(insertions);
@ -90,7 +85,7 @@ public class ContextCovariate implements StandardCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override
public final Comparable getValue(final String str) {
public final Object getValue(final String str) {
return str;
}
@ -100,29 +95,28 @@ public class ContextCovariate implements StandardCovariate {
* @param bases the bases in the read to build the context from
* @param offset the position in the read to calculate the context for
* @param contextSize context size to use building the context
* @param noContextString string to return if the position is not far enough in the read to have a full context before.
* @return
*/
private String contextWith(byte [] bases, int offset, int contextSize, String noContextString) {
return (offset < contextSize) ? noContextString : new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
private BitSet contextWith(byte [] bases, int offset, int contextSize) {
if (offset < contextSize)
return null;
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
if (context.contains("N"))
return null;
return MathUtils.bitSetFrom(context);
}
private String makeAllNStringWithLength(int length) {
String s = "";
for (int i=0; i<length; i++)
s += "N";
return s;
}
/**
* Reverses the given array in place.
*
* @param array any array
*/
private static void reverse(final Comparable[] array) {
private static void reverse(final Object[] array) {
final int arrayLength = array.length;
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
final Comparable temp = array[l];
final Object temp = array[l];
array[l] = array[r];
array[r] = temp;
}

View File

@ -53,7 +53,7 @@ public interface Covariate {
*/
public CovariateValues getValues(GATKSAMRecord read);
public Comparable getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
}
interface RequiredCovariate extends Covariate {}

View File

@ -12,25 +12,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* @since 2/8/12
*/
public class CovariateValues {
private Comparable[] mismatches;
private Comparable[] insertions;
private Comparable[] deletions;
private Object[] mismatches;
private Object[] insertions;
private Object[] deletions;
public CovariateValues(Comparable[] mismatch, Comparable[] insertion, Comparable[] deletion) {
public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
this.mismatches = mismatch;
this.insertions = insertion;
this.deletions = deletion;
}
public Comparable[] getMismatches() {
public Object[] getMismatches() {
return mismatches;
}
public Comparable[] getInsertions() {
public Object[] getInsertions() {
return insertions;
}
public Comparable[] getDeletions() {
public Object[] getDeletions() {
return deletions;
}

View File

@ -198,7 +198,7 @@ public class CycleCovariate implements StandardCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override
public final Comparable getValue(final String str) {
public final Object getValue(final String str) {
return Integer.parseInt(str);
}
}

View File

@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -67,7 +65,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override
public final Comparable getValue(final String str) {
public final Object getValue(final String str) {
return Integer.parseInt(str);
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
import java.util.HashMap;
/*
* Copyright (c) 2009 The Broad Institute
@ -38,6 +39,10 @@ import java.util.Arrays;
*/
public class ReadGroupCovariate implements RequiredCovariate {
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
private short nextId = 0;
// Initialize any member variables using the command-line arguments passed to the walkers
@Override
@ -48,16 +53,29 @@ public class ReadGroupCovariate implements RequiredCovariate {
public CovariateValues getValues(final GATKSAMRecord read) {
final int l = read.getReadLength();
final String readGroupId = read.getReadGroup().getReadGroupId();
String [] readGroups = new String[l];
Arrays.fill(readGroups, readGroupId);
short shortId;
if (readGroupLookupTable.containsKey(readGroupId))
shortId = readGroupLookupTable.get(readGroupId);
else {
shortId = nextId;
readGroupLookupTable.put(readGroupId, nextId);
readGroupReverseLookupTable.put(nextId, readGroupId);
nextId++;
}
Short [] readGroups = new Short[l];
Arrays.fill(readGroups, shortId);
return new CovariateValues(readGroups, readGroups, readGroups);
}
// Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override
public final Comparable getValue(final String str) {
public final Object getValue(final String str) {
return str;
}
public final String decodeReadGroup(final short id) {
return readGroupReverseLookupTable.get(id);
}
}

View File

@ -25,8 +25,14 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -39,6 +45,63 @@ import org.broadinstitute.sting.commandline.Hidden;
public class RecalibrationArgumentCollection {
/**
* This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference,
* so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.)
* for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
*/
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
protected List<RodBinding<Feature>> knownSites = Collections.emptyList();
/**
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
*/
@Gather(CountCovariatesGatherer.class)
@Output
protected PrintStream RECAL_FILE;
/**
* List all implemented covariates.
*/
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
protected boolean LIST_ONLY = false;
/**
* Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list.
*/
@Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
protected String[] COVARIATES = null;
/*
* Use the standard set of covariates in addition to the ones listed using the -cov argument
*/
@Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
protected boolean USE_STANDARD_COVARIATES = true;
/////////////////////////////
// Debugging-only Arguments
/////////////////////////////
/**
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
*/
@Hidden
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
protected boolean RUN_WITHOUT_DBSNP = false;
/////////////////////////////
// protected Member Variables
/////////////////////////////
protected final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables)
protected final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>();// A list to hold the covariate objects that were requested
protected final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
/**
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
* reads which have had the reference inserted because of color space inconsistencies.

View File

@ -0,0 +1,226 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diagnostics;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.Median;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
import java.text.DateFormat;
import java.util.HashMap;
import java.util.Map;
/**
* Emits a GATKReport containing read group, sample, library, platform, center, sequencing data,
* paired end status, simple read type name (e.g. 2x76) median insert size and median read length
* for each read group in every provided BAM file
*
* Note that this walker stops when all read groups have been observed at least a few thousand times so that
* the median statistics are well determined. It is safe to run it WG and it'll finish in an appropriate
* timeframe.
*
* <h2>Input</h2>
* <p>
* Any number of BAM files
* </p>
*
* <h2>Output</h2>
* <p>
* GATKReport containing read group, sample, library, platform, center, median insert size and median read length.
*
* For example, running this tool on the NA12878 data sets:
*
* <pre>
* ##:GATKReport.v0.2 ReadGroupProperties : Table of read group properties
* readgroup sample library platform center date has.any.reads is.paired.end n.reads.analyzed simple.read.type median.read.length median.insert.size
* 20FUK.1 NA12878 Solexa-18483 illumina BI 2/2/10 true true 498 2x101 101 386
* 20FUK.2 NA12878 Solexa-18484 illumina BI 2/2/10 true true 476 2x101 101 417
* 20FUK.3 NA12878 Solexa-18483 illumina BI 2/2/10 true true 407 2x101 101 387
* 20FUK.4 NA12878 Solexa-18484 illumina BI 2/2/10 true true 389 2x101 101 415
* 20FUK.5 NA12878 Solexa-18483 illumina BI 2/2/10 true true 433 2x101 101 386
* 20FUK.6 NA12878 Solexa-18484 illumina BI 2/2/10 true true 480 2x101 101 418
* 20FUK.7 NA12878 Solexa-18483 illumina BI 2/2/10 true true 450 2x101 101 386
* 20FUK.8 NA12878 Solexa-18484 illumina BI 2/2/10 true true 438 2x101 101 418
* 20GAV.1 NA12878 Solexa-18483 illumina BI 1/26/10 true true 490 2x101 101 391
* 20GAV.2 NA12878 Solexa-18484 illumina BI 1/26/10 true true 485 2x101 101 417
* 20GAV.3 NA12878 Solexa-18483 illumina BI 1/26/10 true true 460 2x101 101 392
* 20GAV.4 NA12878 Solexa-18484 illumina BI 1/26/10 true true 434 2x101 101 415
* 20GAV.5 NA12878 Solexa-18483 illumina BI 1/26/10 true true 479 2x101 101 389
* 20GAV.6 NA12878 Solexa-18484 illumina BI 1/26/10 true true 461 2x101 101 416
* 20GAV.7 NA12878 Solexa-18483 illumina BI 1/26/10 true true 509 2x101 101 386
* 20GAV.8 NA12878 Solexa-18484 illumina BI 1/26/10 true true 476 2x101 101 410 101 414
* </pre>
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T ReadGroupProperties
* -I example1.bam -I example2.bam etc
* -R reference.fasta
* -o example.gatkreport.txt
* </pre>
*
* @author Mark DePristo
*/
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
@Output
public PrintStream out;
@Argument(shortName="maxElementsForMedian", doc="Calculate median from the first maxElementsForMedian values observed", required=false)
public int MAX_VALUES_FOR_MEDIAN = 10000;
private final static String TABLE_NAME = "ReadGroupProperties";
private final Map<String, PerReadGroupInfo> readGroupInfo = new HashMap<String, PerReadGroupInfo>();
private class PerReadGroupInfo {
public final Median<Integer> readLength = new Median<Integer>(MAX_VALUES_FOR_MEDIAN);
public final Median<Integer> insertSize = new Median<Integer>(MAX_VALUES_FOR_MEDIAN);
public int nReadsSeen = 0, nReadsPaired = 0;
public boolean needsMoreData() {
return ! readLength.isFull() || ! insertSize.isFull();
}
}
@Override
public void initialize() {
for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
readGroupInfo.put(rg.getId(), new PerReadGroupInfo());
}
}
@Override
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
return ! (read.getReadFailsVendorQualityCheckFlag() || read.getReadUnmappedFlag());
}
@Override
public boolean isDone() {
for ( PerReadGroupInfo info : readGroupInfo.values() ) {
if ( info.needsMoreData() )
return false;
}
return true;
}
@Override
public Integer map(ReferenceContext referenceContext, GATKSAMRecord read, ReadMetaDataTracker readMetaDataTracker) {
final String rgID = read.getReadGroup().getId();
final PerReadGroupInfo info = readGroupInfo.get(rgID);
if ( info.needsMoreData() ) {
info.readLength.add(read.getReadLength());
info.nReadsSeen++;
if ( read.getReadPairedFlag() ) {
info.nReadsPaired++;
if ( read.getInferredInsertSize() != 0) {
info.insertSize.add(Math.abs(read.getInferredInsertSize()));
}
}
}
return null;
}
@Override
public Integer reduceInit() {
return null;
}
@Override
public Integer reduce(Integer integer, Integer integer1) {
return null;
}
@Override
public void onTraversalDone(Integer sum) {
final GATKReport report = new GATKReport();
report.addTable(TABLE_NAME, "Table of read group properties");
GATKReportTable table = report.getTable(TABLE_NAME);
DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT);
table.addPrimaryKey("readgroup");
//* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and
//* median read length for each read group in every BAM file.
table.addColumn("sample", "NA");
table.addColumn("library", "NA");
table.addColumn("platform", "NA");
table.addColumn("center", "NA");
table.addColumn("date", "NA");
table.addColumn("has.any.reads", "false");
table.addColumn("is.paired.end", "false");
table.addColumn("n.reads.analyzed", "NA");
table.addColumn("simple.read.type", "NA");
table.addColumn("median.read.length", Integer.valueOf(0));
table.addColumn("median.insert.size", Integer.valueOf(0));
for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
final String rgID = rg.getId();
PerReadGroupInfo info = readGroupInfo.get(rgID);
// we are paired if > 25% of reads are paired
final boolean isPaired = info.nReadsPaired / (1.0 * (info.nReadsSeen+1)) > 0.25;
final boolean hasAnyReads = info.nReadsSeen > 0;
final int readLength = info.readLength.getMedian(0);
setTableValue(table, rgID, "sample", rg.getSample());
setTableValue(table, rgID, "library", rg.getLibrary());
setTableValue(table, rgID, "platform", rg.getPlatform());
setTableValue(table, rgID, "center", rg.getSequencingCenter());
try {
setTableValue(table, rgID, "date", rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA");
} catch ( NullPointerException e ) {
// TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present
setTableValue(table, rgID, "date", "NA");
}
setTableValue(table, rgID, "has.any.reads", hasAnyReads);
setTableValue(table, rgID, "is.paired.end", isPaired);
setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen);
setTableValue(table, rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA");
setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA" );
setTableValue(table, rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA" );
}
report.print(out);
}
private final void setTableValue(GATKReportTable table, final String rgID, final String key, final Object value) {
table.set(rgID, key, value == null ? "NA" : value);
}
}

View File

@ -454,8 +454,7 @@ public class HaplotypeIndelErrorModel {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup.
// Second term is a constant added to both likelihoods so will be ignored
haplotypeLikehoodMatrix[i][j] += MathUtils.softMax(readLikelihood[0],
readLikelihood[1]);
haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]);
}

View File

@ -166,18 +166,17 @@ public class PairHMMIndelErrorModel {
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
YMetricArray[im1][jm1] + pBaseRead);
matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]});
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
// update Y array
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
}
}
@ -316,9 +315,7 @@ public class PairHMMIndelErrorModel {
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
XMetricArray[bestI][bestJ],
YMetricArray[bestI][bestJ]);
final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] });
/*
if (DEBUG) {
@ -651,7 +648,7 @@ public class PairHMMIndelErrorModel {
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplified to just a single loop without the intermediate NxN matrix
for (int i=0; i < numHaplotypes; i++) {
for (int j=i; j < numHaplotypes; j++){
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
@ -665,7 +662,7 @@ public class PairHMMIndelErrorModel {
final double li = readLikelihoods[readIdx][i];
final double lj = readLikelihoods[readIdx][j];
final int readCount = readCounts[readIdx];
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + LOG_ONE_HALF);
}
}
}
@ -678,7 +675,7 @@ public class PairHMMIndelErrorModel {
}
}
// renormalize so that max element is zero.
// renormalize so that max element is zero.
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
}
}

View File

@ -110,6 +110,13 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
boolean lowerCaseSNPs = false;
/**
* If onlyOutputValidAmplicons is true, the output fasta file will contain only valid sequences.
* Useful for producing delivery-ready files.
*/
@Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false)
boolean onlyOutputValidAmplicons = false;
/**
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
* This changes the size of the k-mer used for alignment.
@ -486,14 +493,16 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
valid = "Valid";
}
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
if (!sequenomOutput)
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
else {
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
probeName = probeName.replace("amplicon_","a");
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
if (!sequenomOutput)
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
else {
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
probeName = probeName.replace("amplicon_","a");
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
}
}
}
}

View File

@ -140,15 +140,16 @@ public class GaussianMixtureModel {
}
for( final VariantDatum datum : data ) {
final ArrayList<Double> pVarInGaussianLog10 = new ArrayList<Double>( gaussians.size() );
final double[] pVarInGaussianLog10 = new double[gaussians.size()];
int gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
final double pVarLog10 = gaussian.evaluateDatumLog10( datum );
pVarInGaussianLog10.add( pVarLog10 );
pVarInGaussianLog10[gaussianIndex++] = pVarLog10;
}
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10 );
int iii = 0;
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false );
gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[iii++] ); //BUGBUG: to clean up
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] );
}
}
}

View File

@ -372,6 +372,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
stream.println("library(ggplot2)");
// For compactPDF in R 2.13+
stream.println("library(tools)");
// For graphical functions R 2.14.2+
stream.println("library(grid)");
createArrangeFunction( stream );

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
@ -113,24 +114,28 @@ public class Haplotype {
return isReference;
}
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final byte[] paddedRef, final int refStart,
final Cigar haplotypeCigar, final int numBasesAddedToStartOfHaplotype, final int refHaplotypeLength ) {
@Requires({"refInsertLocation >= 0", "hapStartInRefCoords >= 0"})
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) {
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation);
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(hapStartInRefCoords, haplotypeCigar, refInsertLocation);
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
return getBases().clone();
return bases.clone();
}
haplotypeInsertLocation += numBasesAddedToStartOfHaplotype;
final byte[] newHaplotype = getBases().clone();
byte[] newHaplotype;
try {
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
newHaplotype = bases.clone();
for( int iii = 0; iii < altAllele.length(); iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
} else if( refAllele.length() < altAllele.length() ) { // insertion
} else if( refAllele.length() < altAllele.length() ) { // insertion
final int altAlleleLength = altAllele.length();
newHaplotype = new byte[bases.length + altAlleleLength];
for( int iii = 0; iii < bases.length; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
}
@ -138,24 +143,17 @@ public class Haplotype {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
} else { // deletion
int refHaplotypeOffset = 0;
for( final CigarElement ce : haplotypeCigar.getCigarElements()) {
if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); }
else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); }
}
for( int iii = 0; iii < altAllele.length(); iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
final int shift = refAllele.length() - altAllele.length();
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) {
newHaplotype[iii] = newHaplotype[iii+shift];
newHaplotype = new byte[bases.length - shift];
for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = 0; iii < shift; iii++ ) {
newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii];
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
newHaplotype[iii] = bases[iii+shift];
}
}
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
return getBases().clone();
return bases.clone();
}
return newHaplotype;

View File

@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.math.BigDecimal;
@ -40,14 +41,6 @@ import java.util.*;
* @author Kiran Garimella
*/
public class MathUtils {
/** Public constants - used for the Lanczos approximation to the factorial function
* (for the calculation of the binomial/multinomial probability in logspace)
* @param LANC_SEQ[] - an array holding the constants which correspond to the product
* of Chebyshev Polynomial coefficients, and points on the Gamma function (for interpolation)
* [see A Precision Approximation of the Gamma Function J. SIAM Numer. Anal. Ser. B, Vol. 1 1964. pp. 86-96]
* @param LANC_G - a value for the Lanczos approximation to the gamma function that works to
* high precision
*/
/**
* Private constructor. No instantiating this class!
@ -55,6 +48,28 @@ public class MathUtils {
private MathUtils() {
}
public static final double[] log10Cache;
private static final double[] jacobianLogTable;
private static final double JACOBIAN_LOG_TABLE_STEP = 0.001;
private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1;
private static final int MAXN = 11000;
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
static {
log10Cache = new double[LOG10_CACHE_SIZE];
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
log10Cache[0] = Double.NEGATIVE_INFINITY;
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
log10Cache[k] = Math.log10(k);
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
}
}
// A fast implementation of the Math.round() method. This method does not perform
// under/overflow checking, so this shouldn't be used in the general case (but is fine
// if one is already make those checks before calling in to the rounding).
@ -535,7 +550,7 @@ public class MathUtils {
// all negative) the largest value; also, we need to convert to normal-space.
double maxValue = Utils.findMaxEntry(array);
// we may decide to just normalize in log space with converting to linear space
// we may decide to just normalize in log space without converting to linear space
if (keepInLogSpace) {
for (int i = 0; i < array.length; i++)
array[i] -= maxValue;
@ -562,29 +577,6 @@ public class MathUtils {
return normalized;
}
public static double[] normalizeFromLog10(List<Double> array, boolean takeLog10OfOutput) {
double[] normalized = new double[array.size()];
// for precision purposes, we need to add (or really subtract, since they're
// all negative) the largest value; also, we need to convert to normal-space.
double maxValue = MathUtils.arrayMaxDouble(array);
for (int i = 0; i < array.size(); i++)
normalized[i] = Math.pow(10, array.get(i) - maxValue);
// normalize
double sum = 0.0;
for (int i = 0; i < array.size(); i++)
sum += normalized[i];
for (int i = 0; i < array.size(); i++) {
double x = normalized[i] / sum;
if (takeLog10OfOutput)
x = Math.log10(x);
normalized[i] = x;
}
return normalized;
}
/**
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
*
@ -595,10 +587,6 @@ public class MathUtils {
return normalizeFromLog10(array, false);
}
public static double[] normalizeFromLog10(List<Double> array) {
return normalizeFromLog10(array, false);
}
public static int maxElementIndex(final double[] array) {
return maxElementIndex(array, array.length);
}
@ -1206,78 +1194,11 @@ public class MathUtils {
return ((double) num) / (Math.max(denom, 1));
}
public static final double[] log10Cache;
public static final double[] jacobianLogTable;
public static final int JACOBIAN_LOG_TABLE_SIZE = 101;
public static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP;
public static final double MAX_JACOBIAN_TOLERANCE = 10.0;
private static final int MAXN = 11000;
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
static {
log10Cache = new double[LOG10_CACHE_SIZE];
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
log10Cache[0] = Double.NEGATIVE_INFINITY;
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
log10Cache[k] = Math.log10(k);
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
}
}
static public double softMax(final double[] vec) {
double acc = vec[0];
for (int k = 1; k < vec.length; k++)
acc = softMax(acc, vec[k]);
return acc;
}
static public double max(double x0, double x1, double x2) {
double a = Math.max(x0, x1);
return Math.max(a, x2);
}
static public double softMax(final double x0, final double x1, final double x2) {
// compute naively log10(10^x[0] + 10^x[1]+...)
// return Math.log10(MathUtils.sumLog10(vec));
// better approximation: do Jacobian logarithm function on data pairs
double a = softMax(x0, x1);
return softMax(a, x2);
}
static public double softMax(final double x, final double y) {
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
// slow exact version:
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
double diff = x - y;
if (diff > MAX_JACOBIAN_TOLERANCE)
return x;
else if (diff < -MAX_JACOBIAN_TOLERANCE)
return y;
else if (diff >= 0) {
int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
return x + jacobianLogTable[ind];
}
else {
int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
return y + jacobianLogTable[ind];
}
}
public static double phredScaleToProbability(byte q) {
return Math.pow(10, (-q) / 10.0);
}
@ -1619,30 +1540,118 @@ public class MathUtils {
* @param bitSet the bitset
* @return an integer with the bitset representation
*/
public static int intFrom(final BitSet bitSet) {
int integer = 0;
public static long intFrom(final BitSet bitSet) {
long number = 0;
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
integer |= 1 << bitIndex;
number |= 1L << bitIndex;
return integer;
return number;
}
/**
* Creates a BitSet representation of a given integer
*
* @param integer the number to turn into a bitset
* @param number the number to turn into a bitset
* @return a bitset representation of the integer
*/
public static BitSet bitSetFrom(int integer) {
BitSet bitSet = new BitSet((int) Math.ceil(Math.sqrt(integer)));
public static BitSet bitSetFrom(long number) {
BitSet bitSet = new BitSet();
int bitIndex = 0;
while (integer > 0) {
if (integer%2 > 0)
while (number > 0) {
if (number%2 > 0)
bitSet.set(bitIndex);
bitIndex++;
integer /= 2;
number /= 2;
}
return bitSet;
}
/**
* Converts a BitSet into the dna string representation.
*
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
* a bitSetFrom(BigNumber) method.
*
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
* base_10 representation of the sequence. This is important for us to know how to bring the number
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
* as 0's and leading 0's are omitted).
*
* quasi-canonical because A is represented by a 0, therefore,
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
* we have : 0, 1, 2, 3, 00, 01, 02, ...
*
* but we can correctly decode it because we know the final length.
*
* @param bitSet the bitset representation of the dna sequence
* @return the dna sequence represented by the bitset
*/
public static String dnaFrom(final BitSet bitSet) {
long number = intFrom(bitSet); // the base_10 representation of the bit set
long preContext = 0; // the number of combinations skipped to get to the quasi-canonical representation (we keep it to subtract later)
long nextContext = 4; // the next context (we advance it so we know which one was preceding it).
int i = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
while (nextContext <= number) { // find the length of the dna string (i)
preContext = nextContext; // keep track of the number of combinations in the preceding context
nextContext += Math.pow(4, ++i);// calculate the next context
}
number -= preContext; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
String dna = "";
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
byte base = (byte) (number % 4);
switch (base) {
case 0 : dna = "A" + dna; break;
case 1 : dna = "C" + dna; break;
case 2 : dna = "G" + dna; break;
case 3 : dna = "T" + dna; break;
}
number /= 4;
}
for (int j = dna.length(); j < i; j++)
dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above)
return dna;
}
/**
* Creates a BitSet representation of a given dna string.
*
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
* a bitSetFrom(BigNumber) method.
*
* The bit representation of a dna string is the simple:
* 0 A 4 AA 8 CA
* 1 C 5 AC ...
* 2 G 6 AG 1343 TTGGT
* 3 T 7 AT 1364 TTTTT
*
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
* preceded the string (with smaller lengths).
*
* @param dna the dna sequence
* @return the bitset representing the dna sequence
*/
public static BitSet bitSetFrom(String dna) {
if (dna.length() > 31)
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than 31. dna: %s (%d)", dna, dna.length()));
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
long preContext = 0; // the sum of all combinations that preceded the length of the dna string
for (int i=0; i<dna.length(); i++) {
baseTen *= 4;
switch(dna.charAt(i)) {
case 'A': baseTen += 0; break;
case 'C': baseTen += 1; break;
case 'G': baseTen += 2; break;
case 'T': baseTen += 3; break;
}
if (i>0)
preContext += Math.pow(4, i); // each length will have 4^i combinations (e.g 1 = 4, 2 = 16, 3 = 64, ...)
}
return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
}
}

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import java.util.*;
/**
* Utility class for calculating median from a data set, potentially limiting the size of data to a
* fixed amount
*
* @author Your Name
* @since Date created
*/
public class Median<T extends Comparable> {
final List<T> values;
final int maxValuesToKeep;
boolean sorted = false;
public Median() {
this(Integer.MAX_VALUE);
}
public Median(final int maxValuesToKeep) {
this.maxValuesToKeep = maxValuesToKeep;
this.values = new ArrayList<T>();
}
public boolean isFull() {
return values.size() >= maxValuesToKeep;
}
public int size() {
return values.size();
}
public boolean isEmpty() {
return values.isEmpty();
}
public T getMedian() {
if ( isEmpty() )
throw new IllegalStateException("Cannot get median value from empty array");
return getMedian(null); // note that value null will never be used
}
/**
* Returns the floor(n + 1 / 2) item from the list of values if the list
* has values, or defaultValue is the list is empty.
*/
public T getMedian(final T defaultValue) {
if ( isEmpty() )
return defaultValue;
if ( ! sorted ) {
sorted = true;
Collections.sort(values);
}
final int offset = (int)Math.floor((values.size() + 1) * 0.5) - 1;
return values.get(offset);
}
public boolean add(final T value) {
if ( ! isFull() ) {
sorted = false;
return values.add(value);
}
else
return false;
}
}

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -314,6 +315,15 @@ public class ClippingOp {
if (start == 0)
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
if (read.hasBaseIndelQualities()) {
byte[] newBaseInsertionQuals = new byte[newLength];
byte[] newBaseDeletionQuals = new byte[newLength];
System.arraycopy(read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength);
System.arraycopy(read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength);
hardClippedRead.setBaseQualities(newBaseInsertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
hardClippedRead.setBaseQualities(newBaseDeletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
}
return hardClippedRead;
}

View File

@ -34,7 +34,7 @@ import java.util.Map;
* Date: Dec 29, 2009
*/
public class NestedHashMap{
public class NestedHashMap {
public final Map data = new HashMap<Object, Object>();

View File

@ -0,0 +1,390 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.io.IOUtils;
import javax.crypto.Cipher;
import java.io.File;
import java.io.InputStream;
import java.security.*;
import java.security.spec.InvalidKeySpecException;
import java.security.spec.KeySpec;
import java.security.spec.PKCS8EncodedKeySpec;
import java.security.spec.X509EncodedKeySpec;
import java.util.Arrays;
/**
* A set of cryptographic utility methods and constants.
*
* Contains methods to:
*
* -Create a public/private key pair
* -Read and write public/private keys to/from files/streams
* -Load the GATK master private/public keys
* -Encrypt/decrypt data
*
* Also contains constants that control the cryptographic defaults
* throughout the GATK.
*
* @author David Roazen
*/
public class CryptUtils {
// ---------------------------------------------------------------------------------
// Constants (these control the default cryptographic settings throughout the GATK):
// ---------------------------------------------------------------------------------
/**
* Default key length in bits of newly-created keys. 2048 bits provides a good balance between
* security and speed.
*/
public static final int DEFAULT_KEY_LENGTH = 2048;
/**
* Default encryption algorithm to use, when none is specified.
*/
public static final String DEFAULT_ENCRYPTION_ALGORITHM = "RSA";
/**
* Default random-number generation algorithm to use, when none is specified.
*/
public static final String DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM = "SHA1PRNG";
/**
* Name of the public key file distributed with the GATK. This file is packaged
* into the GATK jar, and we use the system ClassLoader to find it.
*/
public static final String GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME = "GATK_public.key";
/**
* Location of the master copy of the GATK private key.
*/
public static final String GATK_MASTER_PRIVATE_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_private.key";
/**
* Location of the master copy of the GATK public key. This file should always be the same as
* the public key file distributed with the GATK (and there are automated tests to ensure that it is).
*/
public static final String GATK_MASTER_PUBLIC_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_public.key";
/**
* Directory where generated GATK user keys are stored. See the GATKKey class for more information.
*/
public static final String GATK_USER_KEY_DIRECTORY = "/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/";
// -----------------------
// Utility Methods:
// -----------------------
/**
* Generate a new public/private key pair using the default encryption settings defined above.
*
* @return A new public/private key pair created using the default settings
*/
public static KeyPair generateKeyPair() {
return generateKeyPair(DEFAULT_KEY_LENGTH, DEFAULT_ENCRYPTION_ALGORITHM, DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
}
/**
* Generate a new public/private key pair using custom encryption settings.
*
* @param keyLength Length of the key in bits
* @param encryptionAlgorithm Encryption algorithm to use
* @param randNumberAlgorithm Random-number generation algorithm to use
* @return A new public/private key pair, created according to the specified parameters
*/
public static KeyPair generateKeyPair( int keyLength, String encryptionAlgorithm, String randNumberAlgorithm ) {
try {
KeyPairGenerator keyGen = KeyPairGenerator.getInstance(encryptionAlgorithm);
SecureRandom randomnessSource = createRandomnessSource(randNumberAlgorithm);
keyGen.initialize(keyLength, randomnessSource);
return keyGen.generateKeyPair();
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
}
catch ( Exception e ) {
throw new ReviewedStingException("Error while generating key pair", e);
}
}
/**
* Create a source of randomness using the default random-number generation algorithm.
*
* @return A randomness source that uses the default algorithm
*/
public static SecureRandom createRandomnessSource() {
return createRandomnessSource(DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
}
/**
* Create a source of randomness using a custom random-number generation algorithm.
*
* @param randAlgorithm The random-number generation algorithm to use
* @return A randomness sources that uses the specified algorithm
*/
public static SecureRandom createRandomnessSource ( String randAlgorithm ) {
try {
return SecureRandom.getInstance(randAlgorithm);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested random-number generation algorithm %s", randAlgorithm), e);
}
}
/**
* Writes a public/private key pair to disk
*
* @param keyPair The key pair we're writing to disk
* @param privateKeyFile Location to write the private key
* @param publicKeyFile Location to write the public key
*/
public static void writeKeyPair ( KeyPair keyPair, File privateKeyFile, File publicKeyFile ) {
writeKey(keyPair.getPrivate(), privateKeyFile);
writeKey(keyPair.getPublic(), publicKeyFile);
}
/**
* Writes an arbitrary key to disk
*
* @param key The key to write
* @param destination Location to write the key to
*/
public static void writeKey ( Key key, File destination ) {
IOUtils.writeByteArrayToFile(key.getEncoded(), destination);
}
/**
* Reads in a public key created using the default encryption algorithm from a file.
*
* @param source File containing the public key
* @return The public key read
*/
public static PublicKey readPublicKey ( File source ) {
return decodePublicKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
}
/**
* Reads in a public key created using the default encryption algorithm from a stream.
*
* @param source Stream attached to the public key
* @return The public key read
*/
public static PublicKey readPublicKey ( InputStream source ) {
return decodePublicKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
}
/**
* Decodes the raw bytes of a public key into a usable object.
*
* @param rawKey The encoded bytes of a public key as read from, eg., a file. The
* key must be in the standard X.509 format for a public key.
* @param encryptionAlgorithm The encryption algorithm used to create the public key
* @return The public key as a usable object
*/
public static PublicKey decodePublicKey ( byte[] rawKey, String encryptionAlgorithm ) {
try {
KeySpec keySpec = new X509EncodedKeySpec(rawKey);
KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm);
return keyFactory.generatePublic(keySpec);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
}
catch ( InvalidKeySpecException e ) {
throw new ReviewedStingException("Unable to use X.509 key specification to decode the given key", e);
}
}
/**
* Reads in a private key created using the default encryption algorithm from a file.
*
* @param source File containing the private key
* @return The private key read
*/
public static PrivateKey readPrivateKey ( File source ) {
return decodePrivateKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
}
/**
* Reads in a private key created using the default encryption algorithm from a stream.
*
* @param source Stream attached to the private key
* @return The private key read
*/
public static PrivateKey readPrivateKey ( InputStream source ) {
return decodePrivateKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
}
/**
* Decodes the raw bytes of a private key into a usable object.
*
* @param rawKey The encoded bytes of a private key as read from, eg., a file. The
* key must be in the standard PKCS #8 format for a private key.
* @param encryptionAlgorithm The encryption algorithm used to create the private key
* @return The private key as a usable object
*/
public static PrivateKey decodePrivateKey ( byte[] rawKey, String encryptionAlgorithm ) {
try {
KeySpec keySpec = new PKCS8EncodedKeySpec(rawKey);
KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm);
return keyFactory.generatePrivate(keySpec);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
}
catch ( InvalidKeySpecException e ) {
throw new ReviewedStingException("Unable to use the PKCS #8 key specification to decode the given key", e);
}
}
/**
* Loads the copy of the GATK public key that is distributed with the GATK. Uses the system
* ClassLoader to locate the public key file, which should be stored at the root of the GATK
* jar file.
*
* @return The GATK public key as a usable object
*/
public static PublicKey loadGATKDistributedPublicKey() {
InputStream publicKeyInputStream = ClassLoader.getSystemResourceAsStream(GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME);
if ( publicKeyInputStream == null ) {
throw new ReviewedStingException(String.format("Could not locate the GATK public key %s in the classpath",
GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME));
}
return readPublicKey(publicKeyInputStream);
}
/**
* Loads the master copy of the GATK private key. You must have the appropriate UNIX permissions
* to do this!
*
* @return The GATK master private key as a usable object
*/
public static PrivateKey loadGATKMasterPrivateKey() {
return readPrivateKey(new File(GATK_MASTER_PRIVATE_KEY_FILE));
}
/**
* Loads the master copy of the GATK public key. This should always be the same as the
* public key distributed with the GATK returned by loadGATKDistributedPublicKey().
*
* @return The GATK master public key as a usable object
*/
public static PublicKey loadGATKMasterPublicKey() {
return readPublicKey(new File(GATK_MASTER_PUBLIC_KEY_FILE));
}
/**
* Encrypts the given data using the key provided.
*
* @param data The data to encrypt, as a byte array
* @param encryptKey The key with which to encrypt the data
* @return The encrypted version of the provided data
*/
public static byte[] encryptData ( byte[] data, Key encryptKey ) {
return transformDataUsingCipher(data, encryptKey, Cipher.ENCRYPT_MODE);
}
/**
* Decrypts the given data using the key provided.
*
* @param encryptedData Data to decrypt, as a byte array
* @param decryptKey The key with which to decrypt the data
* @return The decrypted version of the provided data
*/
public static byte[] decryptData ( byte[] encryptedData, Key decryptKey ) {
return transformDataUsingCipher(encryptedData, decryptKey, Cipher.DECRYPT_MODE);
}
/**
* Helper method for encryption/decryption that takes data and processes it using
* the given key
*
* @param data Data to encrypt/decrypt
* @param key Key to use to encrypt/decrypt the data
* @param cipherMode Specifies whether we are encrypting or decrypting
* @return The encrypted/decrypted data
*/
private static byte[] transformDataUsingCipher ( byte[] data, Key key, int cipherMode ) {
try {
Cipher cipher = Cipher.getInstance(key.getAlgorithm());
cipher.init(cipherMode, key);
return cipher.doFinal(data);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Could not find an implementation of the requested algorithm %s",
key.getAlgorithm()), e);
}
catch ( InvalidKeyException e ) {
throw new ReviewedStingException("Key is invalid", e);
}
catch ( GeneralSecurityException e ) {
throw new ReviewedStingException("Error during encryption", e);
}
}
/**
* Tests whether the public/private keys provided can each decrypt data encrypted by
* the other key -- ie., tests whether these two keys are part of the same public/private
* key pair.
*
* @param privateKey The private key to test
* @param publicKey The public key to test
* @return True if the keys are part of the same key pair and can decrypt each other's
* encrypted data, otherwise false.
*/
public static boolean keysDecryptEachOther ( PrivateKey privateKey, PublicKey publicKey ) {
byte[] plainText = "Test PlainText".getBytes();
byte[] dataEncryptedUsingPrivateKey = CryptUtils.encryptData(plainText, privateKey);
byte[] dataEncryptedUsingPublicKey = CryptUtils.encryptData(plainText, publicKey);
byte[] privateKeyDataDecryptedWithPublicKey = CryptUtils.decryptData(dataEncryptedUsingPrivateKey, publicKey);
byte[] publicKeyDataDecryptedWithPrivateKey = CryptUtils.decryptData(dataEncryptedUsingPublicKey, privateKey);
// Make sure we actually transformed the data during encryption:
if ( Arrays.equals(plainText, dataEncryptedUsingPrivateKey) ||
Arrays.equals(plainText, dataEncryptedUsingPublicKey) ||
Arrays.equals(dataEncryptedUsingPrivateKey, dataEncryptedUsingPublicKey) ) {
return false;
}
// Make sure that we were able to recreate the original plaintext using
// both the public key on the private-key-encrypted data and the private
// key on the public-key-encrypted data:
if ( ! Arrays.equals(plainText, privateKeyDataDecryptedWithPublicKey) ||
! Arrays.equals(plainText, publicKeyDataDecryptedWithPrivateKey) ) {
return false;
}
return true;
}
}

View File

@ -0,0 +1,349 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.io.IOUtils;
import java.io.*;
import java.security.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* Class to represent a GATK user key.
*
* A GATK user key contains an email address and a cryptographic signature.
* The signature is the SHA-1 hash of the email address encrypted using
* the GATK master private key. The GATK master public key (distributed
* with the GATK) is used to decrypt the signature and validate the key
* at the start of each GATK run that requires a key.
*
* Keys are cryptographically secure in that valid keys definitely come
* from us and cannot be fabricated, however nothing prevents keys from
* being shared between users.
*
* GATK user keys have the following on-disk format:
*
* GZIP Container:
* Email address
* NUL byte (delimiter)
* Cryptographic Signature (encrypted SHA-1 hash of email address)
*
* The key data is wrapped within a GZIP container to placate over-zealous
* email filters (since keys must often be emailed) and also to provide an
* additional integrity check via the built-in GZIP CRC.
*
* @author David Roazen
*/
public class GATKKey {
/**
* Private key used to sign the GATK key. Required only when creating a new
* key from scratch, not when loading an existing key from disk.
*/
private PrivateKey privateKey;
/**
* Public key used to validate the GATK key.
*/
private PublicKey publicKey;
/**
* The user's email address, stored within the key and signed.
*/
private String emailAddress;
/**
* The cryptographic signature of the email address. By default, this is
* the SHA-1 hash of the email address encrypted using the RSA algorithm.
*/
private byte[] signature;
/**
* The combination of hash/encryption algorithms to use to generate the signature.
* By default this is "SHA1withRSA"
*/
private String signingAlgorithm;
/**
* Default hash/encryption algorithms to use to sign the key.
*/
public static final String DEFAULT_SIGNING_ALGORITHM = "SHA1withRSA";
/**
* Byte value used to separate the email address from its signature in the key file.
*/
public static final byte GATK_KEY_SECTIONAL_DELIMITER = 0;
// -----------------------
// Constructors:
// -----------------------
/**
* Constructor to create a new GATK key from scratch using an email address
* and public/private key pair. The private key is used for signing, and the
* public key is used to validate the newly-created key.
*
* @param privateKey Private key used to sign the new GATK key
* @param publicKey Public key used to validate the new GATK key
* @param emailAddress The user's email address, which we will store in the key and sign
*/
public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress ) {
this(privateKey, publicKey, emailAddress, DEFAULT_SIGNING_ALGORITHM);
}
/**
* Constructor to create a new GATK key from scratch using an email address
* and public/private key pair, and additionally specify the signing algorithm
* to use. The private key is used for signing, and the public key is used to
* validate the newly-created key.
*
* @param privateKey Private key used to sign the new GATK key
* @param publicKey Public key used to validate the new GATK key
* @param emailAddress The user's email address, which we will store in the key and sign
* @param signingAlgorithm The combination of hash and encryption algorithms to use to sign the key
*/
public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress, String signingAlgorithm ) {
if ( privateKey == null || publicKey == null || emailAddress == null || emailAddress.length() == 0 || signingAlgorithm == null ) {
throw new ReviewedStingException("Cannot construct GATKKey using null/empty arguments");
}
this.privateKey = privateKey;
this.publicKey = publicKey;
this.emailAddress = emailAddress;
this.signingAlgorithm = signingAlgorithm;
validateEmailAddress();
generateSignature();
if ( ! isValid() ) {
throw new ReviewedStingException("Newly-generated GATK key fails validation -- this should never happen!");
}
}
/**
* Constructor to load an existing GATK key from a file.
*
* During loading, the key file is checked for integrity, but not cryptographic
* validity (which must be done through a subsequent call to isValid()).
*
* @param publicKey Public key that will be used to validate the loaded GATK key
* in subsequent calls to isValid()
* @param keyFile File containing the GATK key to load
*/
public GATKKey ( PublicKey publicKey, File keyFile ) {
this(publicKey, keyFile, DEFAULT_SIGNING_ALGORITHM);
}
/**
* Constructor to load an existing GATK key from a file, and additionally specify
* the signing algorithm used to sign the key being loaded.
*
* During loading, the key file is checked for integrity, but not cryptographic
* validity (which must be done through a subsequent call to isValid()).
*
* @param publicKey Public key that will be used to validate the loaded GATK key
* in subsequent calls to isValid()
* @param keyFile File containing the GATK key to load
* @param signingAlgorithm The combination of hash and encryption algorithms used to sign the key
*/
public GATKKey ( PublicKey publicKey, File keyFile, String signingAlgorithm ) {
if ( publicKey == null || keyFile == null || signingAlgorithm == null ) {
throw new ReviewedStingException("Cannot construct GATKKey using null arguments");
}
this.publicKey = publicKey;
this.signingAlgorithm = signingAlgorithm;
readKey(keyFile);
}
// -----------------------
// Public API Methods:
// -----------------------
/**
* Writes out this key to a file in the format described at the top of this class,
* encapsulating the key within a GZIP container.
*
* @param destination File to write the key to
*/
public void writeKey ( File destination ) {
try {
byte[] keyBytes = marshalKeyData();
IOUtils.writeByteArrayToStream(keyBytes, new GZIPOutputStream(new FileOutputStream(destination)));
}
catch ( IOException e ) {
throw new UserException.CouldNotCreateOutputFile(destination, e);
}
}
/**
* Checks whether the signature of this key is cryptographically valid (ie., can be
* decrypted by the public key to produce a valid SHA-1 hash of the email address
* in the key).
*
* @return True if the key's signature passes validation, otherwise false
*/
public boolean isValid() {
try {
Signature sig = Signature.getInstance(signingAlgorithm);
sig.initVerify(publicKey);
sig.update(emailAddress.getBytes());
return sig.verify(signature);
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e);
}
catch ( InvalidKeyException e ) {
// If the GATK public key is invalid, it's likely our problem, not the user's:
throw new ReviewedStingException(String.format("Public key %s is invalid", publicKey), e);
}
catch ( SignatureException e ) {
throw new UserException.UnreadableKeyException("Signature is invalid or signing algorithm was unable to process the input data", e);
}
}
// -----------------------
// Private Helper Methods:
// -----------------------
/**
* Helper method that creates a signature for this key using the combination of
* hash/encryption algorithms specified at construction time.
*/
private void generateSignature() {
try {
Signature sig = Signature.getInstance(signingAlgorithm);
sig.initSign(privateKey, CryptUtils.createRandomnessSource());
sig.update(emailAddress.getBytes());
signature = sig.sign();
}
catch ( NoSuchAlgorithmException e ) {
throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e);
}
catch ( InvalidKeyException e ) {
throw new ReviewedStingException(String.format("Private key %s is invalid", privateKey), e);
}
catch ( SignatureException e ) {
throw new ReviewedStingException(String.format("Error creating signature for email address %s", emailAddress), e);
}
}
/**
* Helper method that reads in a GATK key from a file. Should not be called directly --
* use the appropriate constructor above.
*
* @param source File to read the key from
*/
private void readKey ( File source ) {
try {
byte[] keyBytes = IOUtils.readStreamIntoByteArray(new GZIPInputStream(new FileInputStream(source)));
// As a sanity check, compare the number of bytes read to the uncompressed file size
// stored in the GZIP ISIZE field. If they don't match, the key must be corrupt:
if ( keyBytes.length != IOUtils.getGZIPFileUncompressedSize(source) ) {
throw new UserException.UnreadableKeyException("Number of bytes read does not match the uncompressed size specified in the GZIP ISIZE field");
}
unmarshalKeyData(keyBytes);
}
catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(source, e);
}
catch ( IOException e ) {
throw new UserException.UnreadableKeyException(source, e);
}
catch ( UserException.CouldNotReadInputFile e ) {
throw new UserException.UnreadableKeyException(source, e);
}
}
/**
* Helper method that assembles the email address and signature into a format
* suitable for writing to disk.
*
* @return The aggregated key data, ready to be written to disk
*/
private byte[] marshalKeyData() {
byte[] emailAddressBytes = emailAddress.getBytes();
byte[] assembledKey = new byte[emailAddressBytes.length + 1 + signature.length];
System.arraycopy(emailAddressBytes, 0, assembledKey, 0, emailAddressBytes.length);
assembledKey[emailAddressBytes.length] = GATK_KEY_SECTIONAL_DELIMITER;
System.arraycopy(signature, 0, assembledKey, emailAddressBytes.length + 1, signature.length);
return assembledKey;
}
/**
* Helper method that parses the raw key data from disk into its component
* email address and signature. Performs some basic validation in the process.
*
* @param keyBytes The raw, uncompressed key data read from disk
*/
private void unmarshalKeyData ( byte[] keyBytes ) {
int delimiterPosition = -1;
for ( int i = 0; i < keyBytes.length; i++ ) {
if ( keyBytes[i] == GATK_KEY_SECTIONAL_DELIMITER ) {
delimiterPosition = i;
break;
}
}
if ( delimiterPosition == -1 ) {
throw new UserException.UnreadableKeyException("Malformed GATK key contains no sectional delimiter");
}
else if ( delimiterPosition == 0 ) {
throw new UserException.UnreadableKeyException("Malformed GATK key contains no email address");
}
else if ( delimiterPosition == keyBytes.length - 1 ) {
throw new UserException.UnreadableKeyException("Malformed GATK key contains no signature");
}
byte[] emailAddressBytes = new byte[delimiterPosition];
System.arraycopy(keyBytes, 0, emailAddressBytes, 0, delimiterPosition);
emailAddress = new String(emailAddressBytes);
signature = new byte[keyBytes.length - delimiterPosition - 1];
System.arraycopy(keyBytes, delimiterPosition + 1, signature, 0, keyBytes.length - delimiterPosition - 1);
}
/**
* Helper method that ensures that the user's email address does not contain the NUL byte, which we
* reserve as a delimiter within each key file.
*/
private void validateEmailAddress() {
for ( byte b : emailAddress.getBytes() ) {
if ( b == GATK_KEY_SECTIONAL_DELIMITER ) {
throw new UserException(String.format("Email address must not contain a byte with value %d", GATK_KEY_SECTIONAL_DELIMITER));
}
}
}
}

View File

@ -132,6 +132,10 @@ public class UserException extends ReviewedStingException {
public CouldNotReadInputFile(File file, Exception e) {
this(file, e.getMessage());
}
public CouldNotReadInputFile(String message) {
super(message);
}
}
@ -151,6 +155,10 @@ public class UserException extends ReviewedStingException {
public CouldNotCreateOutputFile(File file, Exception e) {
super(String.format("Couldn't write file %s because exception %s", file.getAbsolutePath(), e.getMessage()));
}
public CouldNotCreateOutputFile(String message, Exception e) {
super(message, e);
}
}
public static class MissortedBAM extends UserException {
@ -319,4 +327,32 @@ public class UserException extends ReviewedStingException {
"and try again.", null);
}
}
public static class UnreadableKeyException extends UserException {
public UnreadableKeyException ( File f, Exception e ) {
super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
f.getAbsolutePath(), e.getMessage()));
}
public UnreadableKeyException ( String message, Exception e ) {
this(String.format("%s. Error was: %s", message, e.getMessage()));
}
public UnreadableKeyException ( String message ) {
super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
message));
}
}
public static class KeySignatureVerificationException extends UserException {
public KeySignatureVerificationException ( File f ) {
super(String.format("The signature in key file %s failed cryptographic verification. " +
"If this key was valid in the past, it's likely been revoked. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
"for help.",
f.getAbsolutePath()));
}
}
}

View File

@ -151,23 +151,16 @@ public class FragmentUtils {
final int numBases = firstReadStop + secondRead.getReadLength();
final byte[] bases = new byte[numBases];
final byte[] quals = new byte[numBases];
// BUGBUG: too verbose, clean this up.
final byte[] insertionQuals = new byte[numBases];
final byte[] deletionQuals = new byte[numBases];
final byte[] firstReadBases = firstRead.getReadBases();
final byte[] firstReadQuals = firstRead.getBaseQualities();
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
final byte[] secondReadBases = secondRead.getReadBases();
final byte[] secondReadQuals = secondRead.getBaseQualities();
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
for(int iii = 0; iii < firstReadStop; iii++) {
bases[iii] = firstReadBases[iii];
quals[iii] = firstReadQuals[iii];
insertionQuals[iii] = firstReadInsertionQuals[iii];
deletionQuals[iii] = firstReadDeletionQuals[iii];
}
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
@ -175,22 +168,16 @@ public class FragmentUtils {
}
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
}
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
bases[iii] = secondReadBases[iii-firstReadStop];
quals[iii] = secondReadQuals[iii-firstReadStop];
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
}
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
returnRead.setAlignmentStart(firstRead.getUnclippedStart());
returnRead.setReadBases( bases );
returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION );
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
returnRead.setBaseQualities( quals );
returnRead.setReadGroup( firstRead.getReadGroup() );
returnRead.setReferenceName( firstRead.getReferenceName() );
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
@ -199,6 +186,27 @@ public class FragmentUtils {
returnRead.setCigar( new Cigar( cList ));
returnRead.setMappingQuality( firstRead.getMappingQuality() );
if( firstRead.hasBaseIndelQualities() || secondRead.hasBaseIndelQualities() ) {
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
for(int iii = 0; iii < firstReadStop; iii++) {
insertionQuals[iii] = firstReadInsertionQuals[iii];
deletionQuals[iii] = firstReadDeletionQuals[iii];
}
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score
}
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
}
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
}
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();
returnList.add(returnRead);
return returnList;

View File

@ -29,10 +29,13 @@ import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.*;
public class IOUtils {
@ -400,4 +403,173 @@ public class IOUtils {
public static boolean isSpecialFile(File file) {
return file != null && (file.getAbsolutePath().startsWith("/dev/") || file.equals(DEV_DIR));
}
/**
* Reads the entirety of the given file into a byte array. Uses a read buffer size of 4096 bytes.
*
* @param source File to read
* @return The contents of the file as a byte array
*/
public static byte[] readFileIntoByteArray ( File source ) {
return readFileIntoByteArray(source, 4096);
}
/**
* Reads the entirety of the given file into a byte array using the requested read buffer size.
*
* @param source File to read
* @param readBufferSize Number of bytes to read in at one time
* @return The contents of the file as a byte array
*/
public static byte[] readFileIntoByteArray ( File source, int readBufferSize ) {
if ( source == null ) {
throw new ReviewedStingException("Source file was null");
}
byte[] fileContents;
try {
fileContents = readStreamIntoByteArray(new FileInputStream(source), readBufferSize);
}
catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(source, e);
}
if ( fileContents.length != source.length() ) {
throw new UserException.CouldNotReadInputFile(String.format("Unable to completely read file %s: read only %d/%d bytes",
source.getAbsolutePath(), fileContents.length, source.length()));
}
return fileContents;
}
/**
* Reads all data from the given stream into a byte array. Uses a read buffer size of 4096 bytes.
*
* @param in Stream to read data from
* @return The contents of the stream as a byte array
*/
public static byte[] readStreamIntoByteArray ( InputStream in ) {
return readStreamIntoByteArray(in, 4096);
}
/**
* Reads all data from the given stream into a byte array using the requested read buffer size.
*
* @param in Stream to read data from
* @param readBufferSize Number of bytes to read in at one time
* @return The contents of the stream as a byte array
*/
public static byte[] readStreamIntoByteArray ( InputStream in, int readBufferSize ) {
if ( in == null ) {
throw new ReviewedStingException("Input stream was null");
}
else if ( readBufferSize <= 0 ) {
throw new ReviewedStingException("Read buffer size must be > 0");
}
// Use a fixed-size buffer for each read, but a dynamically-growing buffer
// to hold the accumulated contents of the file/stream:
byte[] readBuffer = new byte[readBufferSize];
ByteArrayOutputStream fileBuffer = new ByteArrayOutputStream(readBufferSize * 4);
try {
try {
int currentBytesRead;
while ( (currentBytesRead = in.read(readBuffer, 0, readBuffer.length)) >= 0 ) {
fileBuffer.write(readBuffer, 0, currentBytesRead);
}
}
finally {
in.close();
}
}
catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile("I/O error reading from input stream", e);
}
return fileBuffer.toByteArray();
}
/**
* Writes the given array of bytes to a file
*
* @param bytes Data to write
* @param destination File to write the data to
*/
public static void writeByteArrayToFile ( byte[] bytes, File destination ) {
if ( destination == null ) {
throw new ReviewedStingException("Destination file was null");
}
try {
writeByteArrayToStream(bytes, new FileOutputStream(destination));
}
catch ( FileNotFoundException e ) {
throw new UserException.CouldNotCreateOutputFile(destination, e);
}
}
/**
* Writes the given array of bytes to a stream
*
* @param bytes Data to write
* @param out Stream to write the data to
*/
public static void writeByteArrayToStream ( byte[] bytes, OutputStream out ) {
if ( bytes == null || out == null ) {
throw new ReviewedStingException("Data to write or output stream was null");
}
try {
try {
out.write(bytes);
}
finally {
out.close();
}
}
catch ( IOException e ) {
throw new UserException.CouldNotCreateOutputFile("I/O error writing to output stream", e);
}
}
/**
* Determines the uncompressed size of a GZIP file. Uses the GZIP ISIZE field in the last
* 4 bytes of the file to get this information.
*
* @param gzipFile GZIP-format file whose uncompressed size to determine
* @return The uncompressed size (in bytes) of the GZIP file
*/
public static int getGZIPFileUncompressedSize ( File gzipFile ) {
if ( gzipFile == null ) {
throw new ReviewedStingException("GZIP file to examine was null");
}
try {
// The GZIP ISIZE field holds the uncompressed size of the compressed data.
// It occupies the last 4 bytes of any GZIP file:
RandomAccessFile in = new RandomAccessFile(gzipFile, "r");
in.seek(gzipFile.length() - 4);
byte[] sizeBytes = new byte[4];
in.read(sizeBytes, 0, 4);
ByteBuffer byteBuf = ByteBuffer.wrap(sizeBytes);
byteBuf.order(ByteOrder.LITTLE_ENDIAN); // The GZIP spec mandates little-endian byte order
int uncompressedSize = byteBuf.getInt();
// If the size read in is negative, we've overflowed our signed integer:
if ( uncompressedSize < 0 ) {
throw new UserException.CouldNotReadInputFile(String.format("Cannot accurately determine the uncompressed size of file %s " +
"because it's either larger than %d bytes or the GZIP ISIZE field is corrupt",
gzipFile.getAbsolutePath(), Integer.MAX_VALUE));
}
return uncompressedSize;
}
catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile(gzipFile, e);
}
}
}

View File

@ -194,6 +194,10 @@ public class GATKSAMRecord extends BAMRecord {
}
}
public boolean hasBaseIndelQualities() {
return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null;
}
public byte[] getBaseInsertionQualities() {
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
if( quals == null ) {

View File

@ -6,6 +6,7 @@ import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.crypt.CryptUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.io.IOUtils;
@ -87,6 +88,9 @@ public abstract class BaseTest {
public static final File testDirFile = new File("public/testdata/");
public static final String testDir = testDirFile.getAbsolutePath() + "/";
public static final String keysDataLocation = validationDataLocation + "keys/";
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
/** before the class starts up */
static {
// setup a basic log configuration
@ -141,7 +145,7 @@ public abstract class BaseTest {
*/
public static class TestDataProvider {
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
private String name;
protected String name;
/**
* Create a new TestDataProvider instance bound to the class variable C

View File

@ -0,0 +1,116 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting;
// the imports for unit testing.
import org.broadinstitute.sting.utils.Median;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class MedianUnitTest extends BaseTest {
// --------------------------------------------------------------------------------
//
// Provider
//
// --------------------------------------------------------------------------------
private class MedianTestProvider extends TestDataProvider {
final List<Integer> values = new ArrayList<Integer>();
final int cap;
final Integer expected;
public MedianTestProvider(int expected, int cap, Integer ... values) {
super(MedianTestProvider.class);
this.expected = expected;
this.cap = cap;
this.values.addAll(Arrays.asList(values));
this.name = String.format("values=%s expected=%d cap=%d", this.values, expected, cap);
}
}
@DataProvider(name = "MedianTestProvider")
public Object[][] makeMedianTestProvider() {
new MedianTestProvider(1, 1000, 0, 1, 2);
new MedianTestProvider(1, 1000, 1, 0, 1, 2);
new MedianTestProvider(1, 1000, 0, 1, 2, 3);
new MedianTestProvider(2, 1000, 0, 1, 2, 3, 4);
new MedianTestProvider(2, 1000, 4, 1, 2, 3, 0);
new MedianTestProvider(1, 1000, 1);
new MedianTestProvider(2, 1000, 2);
new MedianTestProvider(1, 1000, 1, 2);
new MedianTestProvider(1, 3, 1);
new MedianTestProvider(1, 3, 1, 2);
new MedianTestProvider(2, 3, 1, 2, 3);
new MedianTestProvider(2, 3, 1, 2, 3, 4);
new MedianTestProvider(2, 3, 1, 2, 3, 4, 5);
new MedianTestProvider(1, 3, 1);
new MedianTestProvider(1, 3, 1, 2);
new MedianTestProvider(2, 3, 3, 2, 1);
new MedianTestProvider(3, 3, 4, 3, 2, 1);
new MedianTestProvider(4, 3, 5, 4, 3, 2, 1);
return MedianTestProvider.getTests(MedianTestProvider.class);
}
@Test(dataProvider = "MedianTestProvider")
public void testBasicLikelihoods(MedianTestProvider cfg) {
final Median<Integer> median = new Median<Integer>(cfg.cap);
int nAdded = 0;
for ( final int value : cfg.values )
if ( median.add(value) )
nAdded++;
Assert.assertEquals(nAdded, median.size());
Assert.assertEquals(cfg.values.isEmpty(), median.isEmpty());
Assert.assertEquals(cfg.values.size() >= cfg.cap, median.isFull());
Assert.assertEquals(median.getMedian(), cfg.expected, cfg.toString());
}
@Test(expectedExceptions = IllegalStateException.class)
public void testEmptyMedian() {
final Median<Integer> median = new Median<Integer>();
Assert.assertTrue(median.isEmpty());
final Integer d = 100;
Assert.assertEquals(median.getMedian(d), d);
median.getMedian();
}
}

View File

@ -30,6 +30,7 @@ import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.CommandLineGATK;
@ -45,7 +46,7 @@ import java.io.File;
import java.util.*;
public class WalkerTest extends BaseTest {
private static final boolean ENABLE_REPORTING = false;
private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false;
@BeforeMethod
public void initializeRandomGenerator() {
@ -121,11 +122,19 @@ public class WalkerTest extends BaseTest {
}
public class WalkerTestSpec {
// Arguments implicitly included in all Walker command lines, unless explicitly
// disabled using the disableImplicitArgs() method below.
final String IMPLICIT_ARGS = ENABLE_PHONE_HOME_FOR_TESTS ?
String.format("-et %s", GATKRunReport.PhoneHomeOption.STANDARD) :
String.format("-et %s -K %s", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile);
String args = "";
int nOutputFiles = -1;
List<String> md5s = null;
List<String> exts = null;
Class expectedException = null;
boolean includeImplicitArgs = true;
// the default output path for the integration test
private File outputFileLocation = null;
@ -159,6 +168,10 @@ public class WalkerTest extends BaseTest {
this.expectedException = expectedException;
}
public String getArgsWithImplicitArgs() {
return args + (includeImplicitArgs ? " " + IMPLICIT_ARGS : "");
}
public void setOutputFileLocation(File outputFileLocation) {
this.outputFileLocation = outputFileLocation;
}
@ -180,6 +193,9 @@ public class WalkerTest extends BaseTest {
auxillaryFiles.put(expectededMD5sum, outputfile);
}
public void disableImplicitArgs() {
includeImplicitArgs = false;
}
}
protected boolean parameterize() {
@ -213,7 +229,7 @@ public class WalkerTest extends BaseTest {
tmpFiles.add(fl);
}
final String args = String.format(spec.args, tmpFiles.toArray());
final String args = String.format(spec.getArgsWithImplicitArgs(), tmpFiles.toArray());
System.out.println(Utils.dupString('-', 80));
if ( spec.expectsException() ) {
@ -277,13 +293,10 @@ public class WalkerTest extends BaseTest {
* @param args the argument list
* @param expectedException the expected exception or null
*/
public static void executeTest(String name, String args, Class expectedException) {
private void executeTest(String name, String args, Class expectedException) {
CommandLineGATK instance = new CommandLineGATK();
String[] command = Utils.escapeExpressions(args);
// add the logging level to each of the integration test commands
command = Utils.appendArray(command, "-et", ENABLE_REPORTING ? "STANDARD" : "NO_ET");
// run the executable
boolean gotAnException = false;
try {

View File

@ -0,0 +1,103 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.BitSet;
import java.util.Random;
/**
* Short one line description of the walker.
*
* <p>
* [Long description of the walker]
* </p>
*
*
* <h2>Input</h2>
* <p>
* [Description of the Input]
* </p>
*
* <h2>Output</h2>
* <p>
* [Description of the Output]
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T [walker name]
* </pre>
*
* @author Mauricio Carneiro
* @since 3/1/12
*/
public class ContextCovariateUnitTest {
ContextCovariate covariate;
RecalibrationArgumentCollection RAC;
Random random;
@BeforeClass
public void init() {
RAC = new RecalibrationArgumentCollection();
covariate = new ContextCovariate();
random = GenomeAnalysisEngine.getRandomGenerator();
covariate.initialize(RAC);
}
@Test(enabled = true)
public void testSimpleContexts() {
byte [] quals = createRandomReadQuals(101);
byte [] bbases = createRandomReadBases(101);
String bases = stringFrom(bbases);
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
CovariateValues values = covariate.getValues(read);
verifyCovariateArray((BitSet []) values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
verifyCovariateArray((BitSet []) values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
verifyCovariateArray((BitSet []) values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
}
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
for (int i=0; i<values.length; i++) {
if (i >= contextSize)
Assert.assertEquals(MathUtils.dnaFrom(values[i]), bases.substring(i-contextSize, i));
else
Assert.assertNull(values[i]);
}
}
private String stringFrom(byte [] array) {
String s = "";
for (byte value : array)
s += (char) value;
return s;
}
private byte [] createRandomReadQuals(int length) {
byte [] quals = new byte[length];
for (int i=0; i<length; i++)
quals[i] = (byte) random.nextInt(50);
return quals;
}
private byte [] createRandomReadBases(int length) {
byte [] bases = new byte[length];
for (int i=0; i<length; i++) {
switch(random.nextInt(4)) {
case 0: bases[i] = 'A'; break;
case 1: bases[i] = 'C'; break;
case 2: bases[i] = 'G'; break;
case 3: bases[i] = 'T'; break;
}
}
return bases;
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diagnostics;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
* Tests ReadGroupProperties
*/
public class ReadGroupPropertiesIntegrationTest extends WalkerTest {
@Test
public void basicTest() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s",
1,
Arrays.asList("6b8cce223af28cbadcfe87a3b841fc56"));
executeTest("ReadGroupProperties:", spec);
}
}

View File

@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("202b337ebbea3def1be8495eb363dfa8"));
Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232"));
executeTest("test MultiSample Pilot1", spec);
}
@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("ae29b9c9aacce8046dc780430540cd62"));
Arrays.asList("c5b53231f4f6d9524bc4ec8115f44f5c"));
executeTest("test SingleSample Pilot2", spec);
}
@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
Arrays.asList("10027d13befaa07b7900a7af0ae0791c"));
Arrays.asList("5af005255240a2186f04cb50851b8b6f"));
executeTest("test Multiple SNP alleles", spec);
}
@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65";
private final static String COMPRESSED_OUTPUT_MD5 = "a08df9aea2b3df09cf90ff8e6e3be3ea";
@Test
public void testCompressedOutput() {
@ -91,7 +91,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "32a34362dff51d8b73a3335048516d82";
String md5 = "6358934c1c26345013a38261b8c45aa4";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -179,8 +179,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "2cb2544739e01f6c08fd820112914317" );
e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" );
e.put( 0.01, "926b58038dd4989bf7eda697a847eea9" );
e.put( 1.0 / 1850, "93f44105b43b65730a3b821e27b0fa16" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -204,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
Arrays.asList("a1b75a7e12b160b0be823228c958573f"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -223,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
Arrays.asList("3bda1279cd6dcb47885f3e19466f11b9"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899"));
Arrays.asList("d9fc3ba94a0d46029778c7b457e7292a"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -257,7 +257,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6"));
Arrays.asList("b2e30ae3e5ffa6108f9f6178b1d2e679"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f"));
Arrays.asList("2cd182a84613fa91a6020466d2d327e2"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd"));
Arrays.asList("9cd08dc412a007933381e9c76c073899"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@ -290,7 +290,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("320f61c87253aba77d6dc782242cba8b"));
Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("c9897b80615c53a4ea10a4b193d56d9c"));
Arrays.asList("2609675a356f2dfc86f8a1d911210978"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("5282fdb1711a532d726c13507bf80a21"));
Arrays.asList("4fdd8da77167881b71b3547da5c13f94"));
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
}

View File

@ -0,0 +1,163 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.*;
/**
* Basic unit test for Haplotype Class
*/
public class HaplotypeUnitTest extends BaseTest {
@BeforeClass
public void init() {
}
@Test
public void testSimpleInsertionAllele() {
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
}
@Test
public void testSimpleDeletionAllele() {
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
}
@Test
public void testSimpleSNPAllele() {
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA";
basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases);
}
@Test
public void testComplexInsertionAllele() {
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(4, CigarOperator.M));
h1CigarList.add(new CigarElement(10, CigarOperator.I));
h1CigarList.add(new CigarElement(8, CigarOperator.M));
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7, CigarOperator.M));
h1CigarList.add(new CigarElement(4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
}
@Test
public void testComplexDeletionAllele() {
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(4, CigarOperator.M));
h1CigarList.add(new CigarElement(10, CigarOperator.I));
h1CigarList.add(new CigarElement(8, CigarOperator.M));
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7, CigarOperator.M));
h1CigarList.add(new CigarElement(4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
}
@Test
public void testComplexSNPAllele() {
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
h1CigarList.add(new CigarElement(4, CigarOperator.M));
h1CigarList.add(new CigarElement(10, CigarOperator.I));
h1CigarList.add(new CigarElement(8, CigarOperator.M));
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7, CigarOperator.M));
h1CigarList.add(new CigarElement(4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AGCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("T", "G", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCTATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("G", "T", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGCGGGA" + "AGGC";
basicInsertTest("G", "C", 17, h1Cigar, bases, h1bases);
}
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
final Haplotype h = new Haplotype(hap.getBytes());
final Allele h1refAllele = Allele.create(ref, true);
final Allele h1altAllele = Allele.create(alt, false);
final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE, 0, cigar) );
final Haplotype h1expected = new Haplotype(newHap.getBytes());
Assert.assertEquals(h1, h1expected);
}
}

View File

@ -205,16 +205,9 @@ public class MathUtilsUnitTest extends BaseTest {
}
}
@Test(enabled = true)
public void testIntAndBitSetConversion() {
Assert.assertEquals(428, MathUtils.intFrom(MathUtils.bitSetFrom(428)));
Assert.assertEquals(239847, MathUtils.intFrom(MathUtils.bitSetFrom(239847)));
Assert.assertEquals(12726, MathUtils.intFrom(MathUtils.bitSetFrom(12726)));
Assert.assertEquals(0, MathUtils.intFrom(MathUtils.bitSetFrom(0)));
Assert.assertEquals(1, MathUtils.intFrom(MathUtils.bitSetFrom(1)));
Assert.assertEquals(65536, MathUtils.intFrom(MathUtils.bitSetFrom(65536)));
}
/**
* Private functions used by testArrayShuffle()
*/
private boolean hasUniqueElements(Object[] x) {
for (int i = 0; i < x.length; i++)
for (int j = i + 1; j < x.length; j++)
@ -230,10 +223,100 @@ public class MathUtilsUnitTest extends BaseTest {
return set.isEmpty();
}
private void p (Object []x) {
for (Object v: x)
System.out.print((Integer) v + " ");
System.out.println();
@Test(enabled = true)
public void testIntAndBitSetConversion() {
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(239847)), 239847);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(12726)), 12726);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(0)), 0);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(1)), 1);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(65536)), 65536);
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(Long.MAX_VALUE)), Long.MAX_VALUE);
}
@Test(enabled = true)
public void testDNAAndBitSetConversion() {
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("ACGT")), "ACGT");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AGGTGTTGT")), "AGGTGTTGT");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("A")), "A");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("C")), "C");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("G")), "G");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("T")), "T");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CC")), "CC");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AA")), "AA");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AAAA")), "AAAA");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CCCCCCCCCCCCCC")), "CCCCCCCCCCCCCC");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GGGGGGGGGGGGGG")), "GGGGGGGGGGGGGG");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("TTTTTTTTTTTTTT")), "TTTTTTTTTTTTTT");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GTAGACCGATCTCAGCTAGT")), "GTAGACCGATCTCAGCTAGT");
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31)
}
@Test
public void testApproximateLog10SumLog10() {
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
}
@Test
public void testNormalizeFromLog10() {
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
}
/**
* Private function used by testNormalizeFromLog10()
*/
private boolean compareDoubleArrays(double[] b1, double[] b2) {
if( b1.length != b2.length ) {
return false; // sanity check
}
for( int i=0; i < b1.length; i++ ){
if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 )
return false;
}
return true;
}
}

View File

@ -0,0 +1,177 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.security.Key;
import java.security.KeyPair;
import java.security.PrivateKey;
import java.security.PublicKey;
import java.util.Arrays;
public class CryptUtilsUnitTest extends BaseTest {
@Test
public void testGenerateValidKeyPairWithDefaultSettings() {
KeyPair keyPair = CryptUtils.generateKeyPair();
Assert.assertTrue(CryptUtils.keysDecryptEachOther(keyPair.getPrivate(), keyPair.getPublic()));
}
@DataProvider( name = "InvalidKeyPairSettings" )
public Object[][] invalidKeyPairSettingsDataProvider() {
return new Object[][] {
{ -1, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM},
{ CryptUtils.DEFAULT_KEY_LENGTH, "Made-up algorithm", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM},
{ CryptUtils.DEFAULT_KEY_LENGTH, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, "Made-up algorithm"}
};
}
@Test( dataProvider = "InvalidKeyPairSettings", expectedExceptions = ReviewedStingException.class )
public void testGenerateKeyPairWithInvalidSettings( int keyLength, String encryptionAlgorithm, String randomNumberGenerationAlgorithm ) {
KeyPair keyPair = CryptUtils.generateKeyPair(keyLength, encryptionAlgorithm, randomNumberGenerationAlgorithm);
}
@Test
public void testGATKMasterKeyPairMutualDecryption() {
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey()));
}
@Test
public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() {
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey()));
}
@Test
public void testKeyPairWriteThenRead() {
KeyPair keyPair = CryptUtils.generateKeyPair();
File privateKeyFile = createTempFile("testKeyPairWriteThenRead_private", "key");
File publicKeyFile = createTempFile("testKeyPairWriteThenRead_public", "key");
CryptUtils.writeKeyPair(keyPair, privateKeyFile, publicKeyFile);
assertKeysAreEqual(keyPair.getPrivate(), CryptUtils.readPrivateKey(privateKeyFile));
assertKeysAreEqual(keyPair.getPublic(), CryptUtils.readPublicKey(publicKeyFile));
}
@Test
public void testPublicKeyWriteThenReadFromFile() {
File keyFile = createTempFile("testPublicKeyWriteThenReadFromFile", "key");
PublicKey publicKey = CryptUtils.generateKeyPair().getPublic();
CryptUtils.writeKey(publicKey, keyFile);
assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(keyFile));
}
@Test
public void testPublicKeyWriteThenReadFromStream() throws IOException {
File keyFile = createTempFile("testPublicKeyWriteThenReadFromStream", "key");
PublicKey publicKey = CryptUtils.generateKeyPair().getPublic();
CryptUtils.writeKey(publicKey, keyFile);
assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(new FileInputStream(keyFile)));
}
@Test
public void testPrivateKeyWriteThenReadFromFile() {
File keyFile = createTempFile("testPrivateKeyWriteThenReadFromFile", "key");
PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate();
CryptUtils.writeKey(privateKey, keyFile);
assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(keyFile));
}
@Test
public void testPrivateKeyWriteThenReadFromStream() throws IOException {
File keyFile = createTempFile("testPrivateKeyWriteThenReadFromStream", "key");
PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate();
CryptUtils.writeKey(privateKey, keyFile);
assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(new FileInputStream(keyFile)));
}
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
public void testReadNonExistentPublicKey() {
File nonExistentFile = new File("jdshgkdfhg.key");
Assert.assertFalse(nonExistentFile.exists());
CryptUtils.readPublicKey(nonExistentFile);
}
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
public void testReadNonExistentPrivateKey() {
File nonExistentFile = new File("jdshgkdfhg.key");
Assert.assertFalse(nonExistentFile.exists());
CryptUtils.readPrivateKey(nonExistentFile);
}
@Test
public void testDecodePublicKey() {
PublicKey originalKey = CryptUtils.generateKeyPair().getPublic();
PublicKey decodedKey = CryptUtils.decodePublicKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM);
assertKeysAreEqual(originalKey, decodedKey);
}
@Test
public void testDecodePrivateKey() {
PrivateKey originalKey = CryptUtils.generateKeyPair().getPrivate();
PrivateKey decodedKey = CryptUtils.decodePrivateKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM);
assertKeysAreEqual(originalKey, decodedKey);
}
@Test
public void testLoadGATKMasterPrivateKey() {
PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
}
@Test
public void testLoadGATKMasterPublicKey() {
PublicKey gatkMasterPublicKey = CryptUtils.loadGATKMasterPublicKey();
}
@Test
public void testLoadGATKDistributedPublicKey() {
PublicKey gatkDistributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
}
private void assertKeysAreEqual( Key originalKey, Key keyFromDisk ) {
Assert.assertTrue(Arrays.equals(originalKey.getEncoded(), keyFromDisk.getEncoded()));
Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm());
Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat());
}
}

View File

@ -0,0 +1,156 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
public class GATKKeyIntegrationTest extends WalkerTest {
public static final String BASE_COMMAND = String.format("-T PrintReads -R %s -I %s -o %%s",
testDir + "exampleFASTA.fasta",
testDir + "exampleBAM.bam");
public static final String MD5_UPON_SUCCESSFUL_RUN = "b9dc5bf6753ca2819e70b056eaf61258";
private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) {
String command = BASE_COMMAND + String.format(" %s %s", etArg, keyArg);
WalkerTestSpec spec = expectedException != null ?
new WalkerTestSpec(command, 1, expectedException) :
new WalkerTestSpec(command, 1, Arrays.asList(md5));
spec.disableImplicitArgs(); // Turn off automatic inclusion of -et/-K args by WalkerTest
executeTest(testName, spec);
}
@Test
public void testValidKeyNoET() {
runGATKKeyTest("testValidKeyNoET",
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
"-K " + keysDataLocation + "valid.key",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
@Test
public void testValidKeyETStdout() {
runGATKKeyTest("testValidKeyETStdout",
"-et " + GATKRunReport.PhoneHomeOption.STDOUT,
"-K " + keysDataLocation + "valid.key",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
@Test
public void testValidKeyETStandard() {
runGATKKeyTest("testValidKeyETStandard",
"",
"-K " + keysDataLocation + "valid.key",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
@Test
public void testNoKeyNoET() {
runGATKKeyTest("testNoKeyNoET",
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
"",
UserException.class,
null);
}
@Test
public void testNoKeyETStdout() {
runGATKKeyTest("testNoKeyETStdout",
"-et " + GATKRunReport.PhoneHomeOption.STDOUT,
"",
UserException.class,
null);
}
@Test
public void testNoKeyETStandard() {
runGATKKeyTest("testNoKeyETStandard",
"",
"",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
@Test
public void testRevokedKey() {
runGATKKeyTest("testRevokedKey",
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
"-K " + keysDataLocation + "revoked.key",
UserException.KeySignatureVerificationException.class,
null);
}
@DataProvider(name = "CorruptKeyTestData")
public Object[][] corruptKeyDataProvider() {
return new Object[][] {
{ "corrupt_empty.key", UserException.UnreadableKeyException.class },
{ "corrupt_single_byte_file.key", UserException.UnreadableKeyException.class },
{ "corrupt_random_contents.key", UserException.UnreadableKeyException.class },
{ "corrupt_single_byte_deletion.key", UserException.UnreadableKeyException.class },
{ "corrupt_single_byte_insertion.key", UserException.UnreadableKeyException.class },
{ "corrupt_single_byte_change.key", UserException.UnreadableKeyException.class },
{ "corrupt_multi_byte_deletion.key", UserException.UnreadableKeyException.class },
{ "corrupt_multi_byte_insertion.key", UserException.UnreadableKeyException.class },
{ "corrupt_multi_byte_change.key", UserException.UnreadableKeyException.class },
{ "corrupt_bad_isize_field.key", UserException.UnreadableKeyException.class },
{ "corrupt_bad_crc.key", UserException.UnreadableKeyException.class },
{ "corrupt_no_email_address.key", UserException.UnreadableKeyException.class },
{ "corrupt_no_sectional_delimiter.key", UserException.KeySignatureVerificationException.class },
{ "corrupt_no_signature.key", UserException.UnreadableKeyException.class },
{ "corrupt_bad_signature.key", UserException.KeySignatureVerificationException.class },
{ "corrupt_non_gzipped_valid_key.key", UserException.UnreadableKeyException.class }
};
}
@Test(dataProvider = "CorruptKeyTestData")
public void testCorruptKey ( String corruptKeyName, Class expectedException ) {
runGATKKeyTest(String.format("testCorruptKey (%s)", corruptKeyName),
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
"-K " + keysDataLocation + corruptKeyName,
expectedException,
null);
}
@Test
public void testCorruptButNonRequiredKey() {
runGATKKeyTest("testCorruptButNonRequiredKey",
"",
"-K " + keysDataLocation + "corrupt_random_contents.key",
null,
MD5_UPON_SUCCESSFUL_RUN);
}
}

View File

@ -0,0 +1,113 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.crypt;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.io.File;
import java.security.KeyPair;
import java.security.PrivateKey;
import java.security.PublicKey;
public class GATKKeyUnitTest extends BaseTest {
@Test
public void testCreateGATKKeyUsingMasterKeyPair() {
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey();
// We should be able to create a valid GATKKey using our master key pair:
GATKKey key = new GATKKey(masterPrivateKey, masterPublicKey, "foo@bar.com");
Assert.assertTrue(key.isValid());
}
@Test
public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() {
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
// We should also be able to create a valid GATKKey using our master private
// key and the public key we distribute with the GATK:
GATKKey key = new GATKKey(masterPrivateKey, distributedPublicKey, "foo@bar.com");
Assert.assertTrue(key.isValid());
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testKeyPairMismatch() {
KeyPair firstKeyPair = CryptUtils.generateKeyPair();
KeyPair secondKeyPair = CryptUtils.generateKeyPair();
// Attempting to create a GATK Key with private and public keys that aren't part of the
// same key pair should immediately trigger a validation failure:
GATKKey key = new GATKKey(firstKeyPair.getPrivate(), secondKeyPair.getPublic(), "foo@bar.com");
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testEncryptionAlgorithmMismatch() {
KeyPair keyPair = CryptUtils.generateKeyPair(CryptUtils.DEFAULT_KEY_LENGTH, "DSA", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
// Attempting to use a DSA private key to create an RSA signature should throw an error:
GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), "foo@bar.com", "SHA1withRSA");
}
@Test( expectedExceptions = UserException.class )
public void testInvalidEmailAddress() {
String emailAddressWithNulByte = new String(new byte[] { 0 });
KeyPair keyPair = CryptUtils.generateKeyPair();
// Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file:
GATKKey key = new GATKKey(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey(),
emailAddressWithNulByte);
}
@Test
public void testCreateGATKKeyFromValidKeyFile() {
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "valid.key"));
Assert.assertTrue(key.isValid());
}
@Test( expectedExceptions = UserException.UnreadableKeyException.class )
public void testCreateGATKKeyFromCorruptKeyFile() {
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "corrupt_random_contents.key"));
}
@Test
public void testCreateGATKKeyFromRevokedKeyFile() {
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "revoked.key"));
Assert.assertFalse(key.isValid());
}
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
public void testCreateGATKKeyFromNonExistentFile() {
File nonExistentFile = new File("ghfdkgsdhg.key");
Assert.assertFalse(nonExistentFile.exists());
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile);
}
}

View File

@ -27,12 +27,18 @@ package org.broadinstitute.sting.utils.io;
import org.apache.commons.io.FileUtils;
import org.broadinstitute.sting.BaseTest;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
public class IOUtilsUnitTest extends BaseTest {
@ -230,4 +236,90 @@ public class IOUtilsUnitTest extends BaseTest {
Assert.assertFalse(IOUtils.isSpecialFile(new File("/home/user/my.file")));
Assert.assertFalse(IOUtils.isSpecialFile(new File("/devfake/null")));
}
@DataProvider( name = "ByteArrayIOTestData")
public Object[][] byteArrayIOTestDataProvider() {
return new Object[][] {
// file size, read buffer size
{ 0, 4096 },
{ 1, 4096 },
{ 2000, 4096 },
{ 4095, 4096 },
{ 4096, 4096 },
{ 4097, 4096 },
{ 6000, 4096 },
{ 8191, 4096 },
{ 8192, 4096 },
{ 8193, 4096 },
{ 10000, 4096 }
};
}
@Test( dataProvider = "ByteArrayIOTestData" )
public void testWriteThenReadFileIntoByteArray ( int fileSize, int readBufferSize ) throws Exception {
File tempFile = createTempFile(String.format("testWriteThenReadFileIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp");
byte[] dataWritten = getDeterministicRandomData(fileSize);
IOUtils.writeByteArrayToFile(dataWritten, tempFile);
byte[] dataRead = IOUtils.readFileIntoByteArray(tempFile, readBufferSize);
Assert.assertEquals(dataRead.length, dataWritten.length);
Assert.assertTrue(Arrays.equals(dataRead, dataWritten));
}
@Test( dataProvider = "ByteArrayIOTestData" )
public void testWriteThenReadStreamIntoByteArray ( int fileSize, int readBufferSize ) throws Exception {
File tempFile = createTempFile(String.format("testWriteThenReadStreamIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp");
byte[] dataWritten = getDeterministicRandomData(fileSize);
IOUtils.writeByteArrayToStream(dataWritten, new FileOutputStream(tempFile));
byte[] dataRead = IOUtils.readStreamIntoByteArray(new FileInputStream(tempFile), readBufferSize);
Assert.assertEquals(dataRead.length, dataWritten.length);
Assert.assertTrue(Arrays.equals(dataRead, dataWritten));
}
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
public void testReadNonExistentFileIntoByteArray() {
File nonExistentFile = new File("djfhsdkjghdfk");
Assert.assertFalse(nonExistentFile.exists());
IOUtils.readFileIntoByteArray(nonExistentFile);
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testReadNullStreamIntoByteArray() {
IOUtils.readStreamIntoByteArray(null);
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testReadStreamIntoByteArrayInvalidBufferSize() throws Exception {
IOUtils.readStreamIntoByteArray(new FileInputStream(createTempFile("testReadStreamIntoByteArrayInvalidBufferSize", "tmp")),
-1);
}
@Test( expectedExceptions = UserException.CouldNotCreateOutputFile.class )
public void testWriteByteArrayToUncreatableFile() {
IOUtils.writeByteArrayToFile(new byte[]{0}, new File("/dev/foo/bar"));
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testWriteNullByteArrayToFile() {
IOUtils.writeByteArrayToFile(null, createTempFile("testWriteNullByteArrayToFile", "tmp"));
}
@Test( expectedExceptions = ReviewedStingException.class )
public void testWriteByteArrayToNullStream() {
IOUtils.writeByteArrayToStream(new byte[]{0}, null);
}
private byte[] getDeterministicRandomData ( int size ) {
GenomeAnalysisEngine.resetRandomGenerator();
Random rand = GenomeAnalysisEngine.getRandomGenerator();
byte[] randomData = new byte[size];
rand.nextBytes(randomData);
return randomData;
}
}

Binary file not shown.

View File

@ -36,6 +36,8 @@
<dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
<!-- All R scripts in org.broadinstitute.sting -->
<dir name="org/broadinstitute/sting" includes="**/*.R" />
<!-- The GATK public key -->
<file path="GATK_public.key" />
</dependencies>
</executable>
<resources>