Merge branch 'master' of ssh://ni.broadinstitute.org/humgen/gsa-scr1/chartl/dev/unstable
This commit is contained in:
commit
67def6acc8
|
|
@ -47,6 +47,7 @@
|
|||
<property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
|
||||
<property name="resource.file" value="StingText.properties" />
|
||||
<property name="resource.path" value="${java.classes}/StingText.properties" />
|
||||
<property name="key.dir" value="${public.dir}/keys" />
|
||||
|
||||
<property name="scala.public.source.dir" value="${public.dir}/scala/src" />
|
||||
<property name="scala.private.source.dir" value="${private.dir}/scala/src" />
|
||||
|
|
@ -567,6 +568,7 @@
|
|||
</fileset>
|
||||
<fileset dir="${java.classes}" includes="**/commandline/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="**/sting/pipeline/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="**/sting/tools/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
|
||||
|
|
@ -615,6 +617,9 @@
|
|||
<include name="**/gatk/**/*.R"/>
|
||||
<include name="**/alignment/**/*.R"/>
|
||||
</fileset>
|
||||
<fileset dir="${key.dir}">
|
||||
<include name="**/*.key"/>
|
||||
</fileset>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
||||
</manifest>
|
||||
|
|
@ -879,6 +884,7 @@
|
|||
<pathelement location="${R.tar.dir}" />
|
||||
<pathelement location="${R.public.scripts.dir}" />
|
||||
<pathelement location="${R.private.scripts.dir}" />
|
||||
<pathelement location="${key.dir}" />
|
||||
<path refid="external.dependencies" />
|
||||
</path>
|
||||
|
||||
|
|
|
|||
|
|
@ -4,11 +4,9 @@
|
|||
colnames(d) = tableHeader;
|
||||
|
||||
for (i in 1:ncol(d)) {
|
||||
v = suppressWarnings(as.numeric(d[,i]));
|
||||
|
||||
if (length(na.omit(as.numeric(v))) == length(d[,i])) {
|
||||
d[,i] = v;
|
||||
}
|
||||
# use the general type.convert infrastructure of read.table to convert column data to R types
|
||||
v = type.convert(d[,i])
|
||||
d[,i] = v;
|
||||
}
|
||||
|
||||
usedNames = ls(envir=tableEnv, pattern=tableName);
|
||||
|
|
|
|||
|
|
@ -35,9 +35,12 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
|||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.crypt.CryptUtils;
|
||||
import org.broadinstitute.sting.utils.crypt.GATKKey;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
|
||||
import java.security.PublicKey;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -78,6 +81,9 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
Walker<?,?> walker = engine.getWalkerByName(getAnalysisName());
|
||||
|
||||
try {
|
||||
// Make sure a valid GATK user key is present, if required.
|
||||
authorizeGATKRun();
|
||||
|
||||
engine.setArguments(getArgumentCollection());
|
||||
|
||||
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
||||
|
|
@ -130,6 +136,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Authorizes this run of the GATK by checking for a valid GATK user key, if required.
|
||||
* Currently, a key is required only if running with the -et NO_ET or -et STDOUT options.
|
||||
*/
|
||||
private void authorizeGATKRun() {
|
||||
if ( getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.NO_ET ||
|
||||
getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
|
||||
if ( getArgumentCollection().gatkKeyFile == null ) {
|
||||
throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
|
||||
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
|
||||
"for more information and instructions on how to obtain a key.");
|
||||
}
|
||||
else {
|
||||
PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||
GATKKey gatkUserKey = new GATKKey(gatkPublicKey, getArgumentCollection().gatkKeyFile);
|
||||
|
||||
if ( ! gatkUserKey.isValid() ) {
|
||||
throw new UserException.KeySignatureVerificationException(getArgumentCollection().gatkKeyFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
|
||||
|
|
|
|||
|
|
@ -65,9 +65,12 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
|
||||
public Integer readBufferSize = null;
|
||||
|
||||
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
|
||||
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
|
||||
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
|
||||
|
||||
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
|
||||
public File gatkKeyFile = null;
|
||||
|
||||
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
|
||||
public List<String> readFilters = new ArrayList<String>();
|
||||
|
||||
|
|
|
|||
|
|
@ -154,9 +154,7 @@ public class GATKRunReport {
|
|||
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
|
||||
STANDARD,
|
||||
/** Force output to STDOUT. For debugging only */
|
||||
STDOUT,
|
||||
/** Force output to S3. For debugging only */
|
||||
AWS_S3 // todo -- remove me -- really just for testing purposes
|
||||
STDOUT
|
||||
}
|
||||
|
||||
private static final DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss");
|
||||
|
|
@ -239,11 +237,8 @@ public class GATKRunReport {
|
|||
case STDOUT:
|
||||
postReportToStream(System.out);
|
||||
break;
|
||||
case AWS_S3:
|
||||
postReportToAWSS3();
|
||||
break;
|
||||
default:
|
||||
exceptDuringRunReport("BUG: unexcepted PhoneHomeOption ");
|
||||
exceptDuringRunReport("BUG: unexpected PhoneHomeOption ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,15 +43,15 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
||||
|
||||
if ( vc.isSNP() ) {
|
||||
final List<Byte> altAlleles = new ArrayList<Byte>();
|
||||
for ( final Allele a : vc.getAlternateAlleles() )
|
||||
altAlleles.add(a.getBases()[0]);
|
||||
|
||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
final List<Byte> altAlleles = new ArrayList<Byte>();
|
||||
for ( final Allele a : vc.getAlternateAlleles() )
|
||||
altAlleles.add(a.getBases()[0]);
|
||||
|
||||
fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals);
|
||||
}
|
||||
} else if ( vc.isIndel() || vc.isMixed() ) {
|
||||
|
|
|
|||
|
|
@ -26,10 +26,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -43,10 +45,6 @@ public class ContextCovariate implements StandardCovariate {
|
|||
private int insertionsContextSize;
|
||||
private int deletionsContextSize;
|
||||
|
||||
private String mismatchesNoContext = "";
|
||||
private String insertionsNoContext = "";
|
||||
private String deletionsNoContext = "";
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
|
|
@ -57,29 +55,26 @@ public class ContextCovariate implements StandardCovariate {
|
|||
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
|
||||
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
|
||||
|
||||
// initialize no context strings given the size of the context for each covariate type
|
||||
mismatchesNoContext = makeAllNStringWithLength(mismatchesContextSize);
|
||||
insertionsNoContext = makeAllNStringWithLength(insertionsContextSize);
|
||||
deletionsNoContext = makeAllNStringWithLength( deletionsContextSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int l = read.getReadLength();
|
||||
String[] mismatches = new String [l];
|
||||
String[] insertions = new String [l];
|
||||
String[] deletions = new String [l];
|
||||
BitSet[] mismatches = new BitSet[l];
|
||||
BitSet[] insertions = new BitSet[l];
|
||||
BitSet[] deletions = new BitSet[l];
|
||||
|
||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||
byte[] bases = read.getReadBases();
|
||||
if (negativeStrand) {
|
||||
bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
|
||||
}
|
||||
if (negativeStrand)
|
||||
bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext);
|
||||
insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext);
|
||||
deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext);
|
||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
|
||||
insertions[i] = contextWith(bases, i, insertionsContextSize);
|
||||
deletions[i] = contextWith(bases, i, deletionsContextSize);
|
||||
}
|
||||
|
||||
if (negativeStrand) {
|
||||
reverse(mismatches);
|
||||
reverse(insertions);
|
||||
|
|
@ -90,7 +85,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Object getValue(final String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
|
|
@ -100,29 +95,28 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param bases the bases in the read to build the context from
|
||||
* @param offset the position in the read to calculate the context for
|
||||
* @param contextSize context size to use building the context
|
||||
* @param noContextString string to return if the position is not far enough in the read to have a full context before.
|
||||
* @return
|
||||
*/
|
||||
private String contextWith(byte [] bases, int offset, int contextSize, String noContextString) {
|
||||
return (offset < contextSize) ? noContextString : new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
||||
private BitSet contextWith(byte [] bases, int offset, int contextSize) {
|
||||
if (offset < contextSize)
|
||||
return null;
|
||||
|
||||
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
||||
if (context.contains("N"))
|
||||
return null;
|
||||
|
||||
return MathUtils.bitSetFrom(context);
|
||||
}
|
||||
|
||||
private String makeAllNStringWithLength(int length) {
|
||||
String s = "";
|
||||
for (int i=0; i<length; i++)
|
||||
s += "N";
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverses the given array in place.
|
||||
*
|
||||
* @param array any array
|
||||
*/
|
||||
private static void reverse(final Comparable[] array) {
|
||||
private static void reverse(final Object[] array) {
|
||||
final int arrayLength = array.length;
|
||||
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
||||
final Comparable temp = array[l];
|
||||
final Object temp = array[l];
|
||||
array[l] = array[r];
|
||||
array[r] = temp;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ public interface Covariate {
|
|||
*/
|
||||
public CovariateValues getValues(GATKSAMRecord read);
|
||||
|
||||
public Comparable getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {}
|
||||
|
|
|
|||
|
|
@ -12,25 +12,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private Comparable[] mismatches;
|
||||
private Comparable[] insertions;
|
||||
private Comparable[] deletions;
|
||||
private Object[] mismatches;
|
||||
private Object[] insertions;
|
||||
private Object[] deletions;
|
||||
|
||||
public CovariateValues(Comparable[] mismatch, Comparable[] insertion, Comparable[] deletion) {
|
||||
public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
this.insertions = insertion;
|
||||
this.deletions = deletion;
|
||||
}
|
||||
|
||||
public Comparable[] getMismatches() {
|
||||
public Object[] getMismatches() {
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
public Comparable[] getInsertions() {
|
||||
public Object[] getInsertions() {
|
||||
return insertions;
|
||||
}
|
||||
|
||||
public Comparable[] getDeletions() {
|
||||
public Object[] getDeletions() {
|
||||
return deletions;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -198,7 +198,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Object getValue(final String str) {
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -67,7 +65,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Object getValue(final String str) {
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -38,6 +39,10 @@ import java.util.Arrays;
|
|||
*/
|
||||
|
||||
public class ReadGroupCovariate implements RequiredCovariate {
|
||||
|
||||
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
|
||||
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
|
||||
private short nextId = 0;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
|
|
@ -48,16 +53,29 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
final int l = read.getReadLength();
|
||||
final String readGroupId = read.getReadGroup().getReadGroupId();
|
||||
String [] readGroups = new String[l];
|
||||
Arrays.fill(readGroups, readGroupId);
|
||||
short shortId;
|
||||
if (readGroupLookupTable.containsKey(readGroupId))
|
||||
shortId = readGroupLookupTable.get(readGroupId);
|
||||
else {
|
||||
shortId = nextId;
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
nextId++;
|
||||
}
|
||||
Short [] readGroups = new Short[l];
|
||||
Arrays.fill(readGroups, shortId);
|
||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Object getValue(final String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
public final String decodeReadGroup(final short id) {
|
||||
return readGroupReverseLookupTable.get(id);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -25,8 +25,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -39,6 +45,63 @@ import org.broadinstitute.sting.commandline.Hidden;
|
|||
|
||||
public class RecalibrationArgumentCollection {
|
||||
|
||||
/**
|
||||
* This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference,
|
||||
* so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.)
|
||||
* for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
|
||||
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
|
||||
*/
|
||||
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
|
||||
protected List<RodBinding<Feature>> knownSites = Collections.emptyList();
|
||||
|
||||
/**
|
||||
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||
*/
|
||||
@Gather(CountCovariatesGatherer.class)
|
||||
@Output
|
||||
protected PrintStream RECAL_FILE;
|
||||
|
||||
/**
|
||||
* List all implemented covariates.
|
||||
*/
|
||||
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
|
||||
protected boolean LIST_ONLY = false;
|
||||
|
||||
/**
|
||||
* Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list.
|
||||
*/
|
||||
@Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
|
||||
protected String[] COVARIATES = null;
|
||||
|
||||
/*
|
||||
* Use the standard set of covariates in addition to the ones listed using the -cov argument
|
||||
*/
|
||||
@Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
|
||||
protected boolean USE_STANDARD_COVARIATES = true;
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
/////////////////////////////
|
||||
/**
|
||||
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
|
||||
protected boolean RUN_WITHOUT_DBSNP = false;
|
||||
|
||||
/////////////////////////////
|
||||
// protected Member Variables
|
||||
/////////////////////////////
|
||||
protected final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables)
|
||||
protected final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>();// A list to hold the covariate objects that were requested
|
||||
|
||||
protected final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
|
||||
protected final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
|
||||
|
||||
|
||||
/**
|
||||
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||
* reads which have had the reference inserted because of color space inconsistencies.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,226 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.Median;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.text.DateFormat;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Emits a GATKReport containing read group, sample, library, platform, center, sequencing data,
|
||||
* paired end status, simple read type name (e.g. 2x76) median insert size and median read length
|
||||
* for each read group in every provided BAM file
|
||||
*
|
||||
* Note that this walker stops when all read groups have been observed at least a few thousand times so that
|
||||
* the median statistics are well determined. It is safe to run it WG and it'll finish in an appropriate
|
||||
* timeframe.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* Any number of BAM files
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* GATKReport containing read group, sample, library, platform, center, median insert size and median read length.
|
||||
*
|
||||
* For example, running this tool on the NA12878 data sets:
|
||||
*
|
||||
* <pre>
|
||||
* ##:GATKReport.v0.2 ReadGroupProperties : Table of read group properties
|
||||
* readgroup sample library platform center date has.any.reads is.paired.end n.reads.analyzed simple.read.type median.read.length median.insert.size
|
||||
* 20FUK.1 NA12878 Solexa-18483 illumina BI 2/2/10 true true 498 2x101 101 386
|
||||
* 20FUK.2 NA12878 Solexa-18484 illumina BI 2/2/10 true true 476 2x101 101 417
|
||||
* 20FUK.3 NA12878 Solexa-18483 illumina BI 2/2/10 true true 407 2x101 101 387
|
||||
* 20FUK.4 NA12878 Solexa-18484 illumina BI 2/2/10 true true 389 2x101 101 415
|
||||
* 20FUK.5 NA12878 Solexa-18483 illumina BI 2/2/10 true true 433 2x101 101 386
|
||||
* 20FUK.6 NA12878 Solexa-18484 illumina BI 2/2/10 true true 480 2x101 101 418
|
||||
* 20FUK.7 NA12878 Solexa-18483 illumina BI 2/2/10 true true 450 2x101 101 386
|
||||
* 20FUK.8 NA12878 Solexa-18484 illumina BI 2/2/10 true true 438 2x101 101 418
|
||||
* 20GAV.1 NA12878 Solexa-18483 illumina BI 1/26/10 true true 490 2x101 101 391
|
||||
* 20GAV.2 NA12878 Solexa-18484 illumina BI 1/26/10 true true 485 2x101 101 417
|
||||
* 20GAV.3 NA12878 Solexa-18483 illumina BI 1/26/10 true true 460 2x101 101 392
|
||||
* 20GAV.4 NA12878 Solexa-18484 illumina BI 1/26/10 true true 434 2x101 101 415
|
||||
* 20GAV.5 NA12878 Solexa-18483 illumina BI 1/26/10 true true 479 2x101 101 389
|
||||
* 20GAV.6 NA12878 Solexa-18484 illumina BI 1/26/10 true true 461 2x101 101 416
|
||||
* 20GAV.7 NA12878 Solexa-18483 illumina BI 1/26/10 true true 509 2x101 101 386
|
||||
* 20GAV.8 NA12878 Solexa-18484 illumina BI 1/26/10 true true 476 2x101 101 410 101 414
|
||||
* </pre>
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T ReadGroupProperties
|
||||
* -I example1.bam -I example2.bam etc
|
||||
* -R reference.fasta
|
||||
* -o example.gatkreport.txt
|
||||
* </pre>
|
||||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
|
||||
|
||||
|
||||
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
||||
@Output
|
||||
public PrintStream out;
|
||||
|
||||
@Argument(shortName="maxElementsForMedian", doc="Calculate median from the first maxElementsForMedian values observed", required=false)
|
||||
public int MAX_VALUES_FOR_MEDIAN = 10000;
|
||||
|
||||
private final static String TABLE_NAME = "ReadGroupProperties";
|
||||
private final Map<String, PerReadGroupInfo> readGroupInfo = new HashMap<String, PerReadGroupInfo>();
|
||||
|
||||
private class PerReadGroupInfo {
|
||||
public final Median<Integer> readLength = new Median<Integer>(MAX_VALUES_FOR_MEDIAN);
|
||||
public final Median<Integer> insertSize = new Median<Integer>(MAX_VALUES_FOR_MEDIAN);
|
||||
public int nReadsSeen = 0, nReadsPaired = 0;
|
||||
|
||||
public boolean needsMoreData() {
|
||||
return ! readLength.isFull() || ! insertSize.isFull();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
|
||||
readGroupInfo.put(rg.getId(), new PerReadGroupInfo());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
return ! (read.getReadFailsVendorQualityCheckFlag() || read.getReadUnmappedFlag());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDone() {
|
||||
for ( PerReadGroupInfo info : readGroupInfo.values() ) {
|
||||
if ( info.needsMoreData() )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ReferenceContext referenceContext, GATKSAMRecord read, ReadMetaDataTracker readMetaDataTracker) {
|
||||
final String rgID = read.getReadGroup().getId();
|
||||
final PerReadGroupInfo info = readGroupInfo.get(rgID);
|
||||
|
||||
if ( info.needsMoreData() ) {
|
||||
info.readLength.add(read.getReadLength());
|
||||
info.nReadsSeen++;
|
||||
if ( read.getReadPairedFlag() ) {
|
||||
info.nReadsPaired++;
|
||||
if ( read.getInferredInsertSize() != 0) {
|
||||
info.insertSize.add(Math.abs(read.getInferredInsertSize()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer integer, Integer integer1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer sum) {
|
||||
final GATKReport report = new GATKReport();
|
||||
report.addTable(TABLE_NAME, "Table of read group properties");
|
||||
GATKReportTable table = report.getTable(TABLE_NAME);
|
||||
DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT);
|
||||
|
||||
table.addPrimaryKey("readgroup");
|
||||
//* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and
|
||||
//* median read length for each read group in every BAM file.
|
||||
table.addColumn("sample", "NA");
|
||||
table.addColumn("library", "NA");
|
||||
table.addColumn("platform", "NA");
|
||||
table.addColumn("center", "NA");
|
||||
table.addColumn("date", "NA");
|
||||
table.addColumn("has.any.reads", "false");
|
||||
table.addColumn("is.paired.end", "false");
|
||||
table.addColumn("n.reads.analyzed", "NA");
|
||||
table.addColumn("simple.read.type", "NA");
|
||||
table.addColumn("median.read.length", Integer.valueOf(0));
|
||||
table.addColumn("median.insert.size", Integer.valueOf(0));
|
||||
|
||||
for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
|
||||
final String rgID = rg.getId();
|
||||
PerReadGroupInfo info = readGroupInfo.get(rgID);
|
||||
|
||||
// we are paired if > 25% of reads are paired
|
||||
final boolean isPaired = info.nReadsPaired / (1.0 * (info.nReadsSeen+1)) > 0.25;
|
||||
final boolean hasAnyReads = info.nReadsSeen > 0;
|
||||
final int readLength = info.readLength.getMedian(0);
|
||||
|
||||
setTableValue(table, rgID, "sample", rg.getSample());
|
||||
setTableValue(table, rgID, "library", rg.getLibrary());
|
||||
setTableValue(table, rgID, "platform", rg.getPlatform());
|
||||
setTableValue(table, rgID, "center", rg.getSequencingCenter());
|
||||
try {
|
||||
setTableValue(table, rgID, "date", rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA");
|
||||
} catch ( NullPointerException e ) {
|
||||
// TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present
|
||||
setTableValue(table, rgID, "date", "NA");
|
||||
}
|
||||
setTableValue(table, rgID, "has.any.reads", hasAnyReads);
|
||||
setTableValue(table, rgID, "is.paired.end", isPaired);
|
||||
setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen);
|
||||
setTableValue(table, rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA");
|
||||
setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA" );
|
||||
setTableValue(table, rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA" );
|
||||
}
|
||||
|
||||
report.print(out);
|
||||
}
|
||||
|
||||
private final void setTableValue(GATKReportTable table, final String rgID, final String key, final Object value) {
|
||||
table.set(rgID, key, value == null ? "NA" : value);
|
||||
}
|
||||
}
|
||||
|
|
@ -454,8 +454,7 @@ public class HaplotypeIndelErrorModel {
|
|||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2)
|
||||
// First term is approximated by Jacobian log with table lookup.
|
||||
// Second term is a constant added to both likelihoods so will be ignored
|
||||
haplotypeLikehoodMatrix[i][j] += MathUtils.softMax(readLikelihood[0],
|
||||
readLikelihood[1]);
|
||||
haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -166,18 +166,17 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
||||
|
||||
matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
|
||||
YMetricArray[im1][jm1] + pBaseRead);
|
||||
matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]});
|
||||
|
||||
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
||||
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
||||
|
||||
XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
|
||||
XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
|
||||
|
||||
// update Y array
|
||||
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
||||
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
||||
YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
|
||||
YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -316,9 +315,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
|
||||
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
|
||||
final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
|
||||
XMetricArray[bestI][bestJ],
|
||||
YMetricArray[bestI][bestJ]);
|
||||
final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] });
|
||||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
|
|
@ -651,7 +648,7 @@ public class PairHMMIndelErrorModel {
|
|||
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
|
||||
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
|
||||
|
||||
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
|
||||
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplified to just a single loop without the intermediate NxN matrix
|
||||
for (int i=0; i < numHaplotypes; i++) {
|
||||
for (int j=i; j < numHaplotypes; j++){
|
||||
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
||||
|
|
@ -665,7 +662,7 @@ public class PairHMMIndelErrorModel {
|
|||
final double li = readLikelihoods[readIdx][i];
|
||||
final double lj = readLikelihoods[readIdx][j];
|
||||
final int readCount = readCounts[readIdx];
|
||||
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
|
||||
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + LOG_ONE_HALF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -678,7 +675,7 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
}
|
||||
|
||||
// renormalize so that max element is zero.
|
||||
// renormalize so that max element is zero.
|
||||
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -110,6 +110,13 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
|
||||
boolean lowerCaseSNPs = false;
|
||||
|
||||
/**
|
||||
* If onlyOutputValidAmplicons is true, the output fasta file will contain only valid sequences.
|
||||
* Useful for producing delivery-ready files.
|
||||
*/
|
||||
@Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false)
|
||||
boolean onlyOutputValidAmplicons = false;
|
||||
|
||||
/**
|
||||
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
|
||||
* This changes the size of the k-mer used for alignment.
|
||||
|
|
@ -486,14 +493,16 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
valid = "Valid";
|
||||
}
|
||||
|
||||
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
||||
|
||||
if (!sequenomOutput)
|
||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||
else {
|
||||
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
||||
probeName = probeName.replace("amplicon_","a");
|
||||
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
||||
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
|
||||
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
||||
if (!sequenomOutput)
|
||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||
else {
|
||||
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
||||
probeName = probeName.replace("amplicon_","a");
|
||||
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -140,15 +140,16 @@ public class GaussianMixtureModel {
|
|||
}
|
||||
|
||||
for( final VariantDatum datum : data ) {
|
||||
final ArrayList<Double> pVarInGaussianLog10 = new ArrayList<Double>( gaussians.size() );
|
||||
final double[] pVarInGaussianLog10 = new double[gaussians.size()];
|
||||
int gaussianIndex = 0;
|
||||
for( final MultivariateGaussian gaussian : gaussians ) {
|
||||
final double pVarLog10 = gaussian.evaluateDatumLog10( datum );
|
||||
pVarInGaussianLog10.add( pVarLog10 );
|
||||
pVarInGaussianLog10[gaussianIndex++] = pVarLog10;
|
||||
}
|
||||
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10 );
|
||||
int iii = 0;
|
||||
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false );
|
||||
gaussianIndex = 0;
|
||||
for( final MultivariateGaussian gaussian : gaussians ) {
|
||||
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[iii++] ); //BUGBUG: to clean up
|
||||
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -372,6 +372,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
stream.println("library(ggplot2)");
|
||||
// For compactPDF in R 2.13+
|
||||
stream.println("library(tools)");
|
||||
// For graphical functions R 2.14.2+
|
||||
stream.println("library(grid)");
|
||||
|
||||
createArrangeFunction( stream );
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
|
|
@ -113,24 +114,28 @@ public class Haplotype {
|
|||
return isReference;
|
||||
}
|
||||
|
||||
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final byte[] paddedRef, final int refStart,
|
||||
final Cigar haplotypeCigar, final int numBasesAddedToStartOfHaplotype, final int refHaplotypeLength ) {
|
||||
@Requires({"refInsertLocation >= 0", "hapStartInRefCoords >= 0"})
|
||||
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) {
|
||||
|
||||
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
|
||||
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation);
|
||||
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(hapStartInRefCoords, haplotypeCigar, refInsertLocation);
|
||||
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
return getBases().clone();
|
||||
return bases.clone();
|
||||
}
|
||||
haplotypeInsertLocation += numBasesAddedToStartOfHaplotype;
|
||||
final byte[] newHaplotype = getBases().clone();
|
||||
byte[] newHaplotype;
|
||||
|
||||
try {
|
||||
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
|
||||
newHaplotype = bases.clone();
|
||||
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
}
|
||||
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
||||
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
||||
final int altAlleleLength = altAllele.length();
|
||||
newHaplotype = new byte[bases.length + altAlleleLength];
|
||||
for( int iii = 0; iii < bases.length; iii++ ) {
|
||||
newHaplotype[iii] = bases[iii];
|
||||
}
|
||||
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
|
||||
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
||||
}
|
||||
|
|
@ -138,24 +143,17 @@ public class Haplotype {
|
|||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
}
|
||||
} else { // deletion
|
||||
int refHaplotypeOffset = 0;
|
||||
for( final CigarElement ce : haplotypeCigar.getCigarElements()) {
|
||||
if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); }
|
||||
else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); }
|
||||
}
|
||||
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
}
|
||||
final int shift = refAllele.length() - altAllele.length();
|
||||
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) {
|
||||
newHaplotype[iii] = newHaplotype[iii+shift];
|
||||
newHaplotype = new byte[bases.length - shift];
|
||||
for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
|
||||
newHaplotype[iii] = bases[iii];
|
||||
}
|
||||
for( int iii = 0; iii < shift; iii++ ) {
|
||||
newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii];
|
||||
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
|
||||
newHaplotype[iii] = bases[iii+shift];
|
||||
}
|
||||
}
|
||||
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
|
||||
return getBases().clone();
|
||||
return bases.clone();
|
||||
}
|
||||
|
||||
return newHaplotype;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
|
@ -40,14 +41,6 @@ import java.util.*;
|
|||
* @author Kiran Garimella
|
||||
*/
|
||||
public class MathUtils {
|
||||
/** Public constants - used for the Lanczos approximation to the factorial function
|
||||
* (for the calculation of the binomial/multinomial probability in logspace)
|
||||
* @param LANC_SEQ[] - an array holding the constants which correspond to the product
|
||||
* of Chebyshev Polynomial coefficients, and points on the Gamma function (for interpolation)
|
||||
* [see A Precision Approximation of the Gamma Function J. SIAM Numer. Anal. Ser. B, Vol. 1 1964. pp. 86-96]
|
||||
* @param LANC_G - a value for the Lanczos approximation to the gamma function that works to
|
||||
* high precision
|
||||
*/
|
||||
|
||||
/**
|
||||
* Private constructor. No instantiating this class!
|
||||
|
|
@ -55,6 +48,28 @@ public class MathUtils {
|
|||
private MathUtils() {
|
||||
}
|
||||
|
||||
public static final double[] log10Cache;
|
||||
private static final double[] jacobianLogTable;
|
||||
private static final double JACOBIAN_LOG_TABLE_STEP = 0.001;
|
||||
private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
|
||||
private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1;
|
||||
private static final int MAXN = 11000;
|
||||
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
||||
|
||||
static {
|
||||
log10Cache = new double[LOG10_CACHE_SIZE];
|
||||
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
||||
|
||||
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
||||
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
|
||||
log10Cache[k] = Math.log10(k);
|
||||
|
||||
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// A fast implementation of the Math.round() method. This method does not perform
|
||||
// under/overflow checking, so this shouldn't be used in the general case (but is fine
|
||||
// if one is already make those checks before calling in to the rounding).
|
||||
|
|
@ -535,7 +550,7 @@ public class MathUtils {
|
|||
// all negative) the largest value; also, we need to convert to normal-space.
|
||||
double maxValue = Utils.findMaxEntry(array);
|
||||
|
||||
// we may decide to just normalize in log space with converting to linear space
|
||||
// we may decide to just normalize in log space without converting to linear space
|
||||
if (keepInLogSpace) {
|
||||
for (int i = 0; i < array.length; i++)
|
||||
array[i] -= maxValue;
|
||||
|
|
@ -562,29 +577,6 @@ public class MathUtils {
|
|||
return normalized;
|
||||
}
|
||||
|
||||
public static double[] normalizeFromLog10(List<Double> array, boolean takeLog10OfOutput) {
|
||||
double[] normalized = new double[array.size()];
|
||||
|
||||
// for precision purposes, we need to add (or really subtract, since they're
|
||||
// all negative) the largest value; also, we need to convert to normal-space.
|
||||
double maxValue = MathUtils.arrayMaxDouble(array);
|
||||
for (int i = 0; i < array.size(); i++)
|
||||
normalized[i] = Math.pow(10, array.get(i) - maxValue);
|
||||
|
||||
// normalize
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < array.size(); i++)
|
||||
sum += normalized[i];
|
||||
for (int i = 0; i < array.size(); i++) {
|
||||
double x = normalized[i] / sum;
|
||||
if (takeLog10OfOutput)
|
||||
x = Math.log10(x);
|
||||
normalized[i] = x;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
|
||||
*
|
||||
|
|
@ -595,10 +587,6 @@ public class MathUtils {
|
|||
return normalizeFromLog10(array, false);
|
||||
}
|
||||
|
||||
public static double[] normalizeFromLog10(List<Double> array) {
|
||||
return normalizeFromLog10(array, false);
|
||||
}
|
||||
|
||||
public static int maxElementIndex(final double[] array) {
|
||||
return maxElementIndex(array, array.length);
|
||||
}
|
||||
|
|
@ -1206,78 +1194,11 @@ public class MathUtils {
|
|||
return ((double) num) / (Math.max(denom, 1));
|
||||
}
|
||||
|
||||
public static final double[] log10Cache;
|
||||
public static final double[] jacobianLogTable;
|
||||
public static final int JACOBIAN_LOG_TABLE_SIZE = 101;
|
||||
public static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
|
||||
public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP;
|
||||
public static final double MAX_JACOBIAN_TOLERANCE = 10.0;
|
||||
private static final int MAXN = 11000;
|
||||
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
||||
|
||||
static {
|
||||
log10Cache = new double[LOG10_CACHE_SIZE];
|
||||
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
||||
|
||||
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
||||
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
|
||||
log10Cache[k] = Math.log10(k);
|
||||
|
||||
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static public double softMax(final double[] vec) {
|
||||
double acc = vec[0];
|
||||
for (int k = 1; k < vec.length; k++)
|
||||
acc = softMax(acc, vec[k]);
|
||||
|
||||
return acc;
|
||||
|
||||
}
|
||||
|
||||
static public double max(double x0, double x1, double x2) {
|
||||
double a = Math.max(x0, x1);
|
||||
return Math.max(a, x2);
|
||||
}
|
||||
|
||||
static public double softMax(final double x0, final double x1, final double x2) {
|
||||
// compute naively log10(10^x[0] + 10^x[1]+...)
|
||||
// return Math.log10(MathUtils.sumLog10(vec));
|
||||
|
||||
// better approximation: do Jacobian logarithm function on data pairs
|
||||
double a = softMax(x0, x1);
|
||||
return softMax(a, x2);
|
||||
}
|
||||
|
||||
static public double softMax(final double x, final double y) {
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup
|
||||
// with integer quantization
|
||||
|
||||
// slow exact version:
|
||||
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
|
||||
|
||||
double diff = x - y;
|
||||
|
||||
if (diff > MAX_JACOBIAN_TOLERANCE)
|
||||
return x;
|
||||
else if (diff < -MAX_JACOBIAN_TOLERANCE)
|
||||
return y;
|
||||
else if (diff >= 0) {
|
||||
int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
|
||||
return x + jacobianLogTable[ind];
|
||||
}
|
||||
else {
|
||||
int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
|
||||
return y + jacobianLogTable[ind];
|
||||
}
|
||||
}
|
||||
|
||||
public static double phredScaleToProbability(byte q) {
|
||||
return Math.pow(10, (-q) / 10.0);
|
||||
}
|
||||
|
|
@ -1619,30 +1540,118 @@ public class MathUtils {
|
|||
* @param bitSet the bitset
|
||||
* @return an integer with the bitset representation
|
||||
*/
|
||||
public static int intFrom(final BitSet bitSet) {
|
||||
int integer = 0;
|
||||
public static long intFrom(final BitSet bitSet) {
|
||||
long number = 0;
|
||||
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
|
||||
integer |= 1 << bitIndex;
|
||||
number |= 1L << bitIndex;
|
||||
|
||||
return integer;
|
||||
return number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given integer
|
||||
*
|
||||
* @param integer the number to turn into a bitset
|
||||
* @param number the number to turn into a bitset
|
||||
* @return a bitset representation of the integer
|
||||
*/
|
||||
public static BitSet bitSetFrom(int integer) {
|
||||
BitSet bitSet = new BitSet((int) Math.ceil(Math.sqrt(integer)));
|
||||
public static BitSet bitSetFrom(long number) {
|
||||
BitSet bitSet = new BitSet();
|
||||
int bitIndex = 0;
|
||||
while (integer > 0) {
|
||||
if (integer%2 > 0)
|
||||
while (number > 0) {
|
||||
if (number%2 > 0)
|
||||
bitSet.set(bitIndex);
|
||||
bitIndex++;
|
||||
integer /= 2;
|
||||
number /= 2;
|
||||
}
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a BitSet into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param bitSet the bitset representation of the dna sequence
|
||||
* @return the dna sequence represented by the bitset
|
||||
*/
|
||||
public static String dnaFrom(final BitSet bitSet) {
|
||||
long number = intFrom(bitSet); // the base_10 representation of the bit set
|
||||
long preContext = 0; // the number of combinations skipped to get to the quasi-canonical representation (we keep it to subtract later)
|
||||
long nextContext = 4; // the next context (we advance it so we know which one was preceding it).
|
||||
int i = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
while (nextContext <= number) { // find the length of the dna string (i)
|
||||
preContext = nextContext; // keep track of the number of combinations in the preceding context
|
||||
nextContext += Math.pow(4, ++i);// calculate the next context
|
||||
}
|
||||
number -= preContext; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
String dna = "";
|
||||
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
byte base = (byte) (number % 4);
|
||||
switch (base) {
|
||||
case 0 : dna = "A" + dna; break;
|
||||
case 1 : dna = "C" + dna; break;
|
||||
case 2 : dna = "G" + dna; break;
|
||||
case 3 : dna = "T" + dna; break;
|
||||
}
|
||||
number /= 4;
|
||||
}
|
||||
for (int j = dna.length(); j < i; j++)
|
||||
dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @return the bitset representing the dna sequence
|
||||
*/
|
||||
public static BitSet bitSetFrom(String dna) {
|
||||
if (dna.length() > 31)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than 31. dna: %s (%d)", dna, dna.length()));
|
||||
|
||||
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
|
||||
long preContext = 0; // the sum of all combinations that preceded the length of the dna string
|
||||
for (int i=0; i<dna.length(); i++) {
|
||||
baseTen *= 4;
|
||||
switch(dna.charAt(i)) {
|
||||
case 'A': baseTen += 0; break;
|
||||
case 'C': baseTen += 1; break;
|
||||
case 'G': baseTen += 2; break;
|
||||
case 'T': baseTen += 3; break;
|
||||
}
|
||||
if (i>0)
|
||||
preContext += Math.pow(4, i); // each length will have 4^i combinations (e.g 1 = 4, 2 = 16, 3 = 64, ...)
|
||||
}
|
||||
|
||||
return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Utility class for calculating median from a data set, potentially limiting the size of data to a
|
||||
* fixed amount
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class Median<T extends Comparable> {
|
||||
final List<T> values;
|
||||
final int maxValuesToKeep;
|
||||
boolean sorted = false;
|
||||
|
||||
public Median() {
|
||||
this(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
public Median(final int maxValuesToKeep) {
|
||||
this.maxValuesToKeep = maxValuesToKeep;
|
||||
this.values = new ArrayList<T>();
|
||||
}
|
||||
|
||||
public boolean isFull() {
|
||||
return values.size() >= maxValuesToKeep;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return values.size();
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return values.isEmpty();
|
||||
}
|
||||
|
||||
public T getMedian() {
|
||||
if ( isEmpty() )
|
||||
throw new IllegalStateException("Cannot get median value from empty array");
|
||||
return getMedian(null); // note that value null will never be used
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the floor(n + 1 / 2) item from the list of values if the list
|
||||
* has values, or defaultValue is the list is empty.
|
||||
*/
|
||||
public T getMedian(final T defaultValue) {
|
||||
if ( isEmpty() )
|
||||
return defaultValue;
|
||||
|
||||
if ( ! sorted ) {
|
||||
sorted = true;
|
||||
Collections.sort(values);
|
||||
}
|
||||
|
||||
final int offset = (int)Math.floor((values.size() + 1) * 0.5) - 1;
|
||||
return values.get(offset);
|
||||
}
|
||||
|
||||
public boolean add(final T value) {
|
||||
if ( ! isFull() ) {
|
||||
sorted = false;
|
||||
return values.add(value);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -314,6 +315,15 @@ public class ClippingOp {
|
|||
if (start == 0)
|
||||
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
|
||||
|
||||
if (read.hasBaseIndelQualities()) {
|
||||
byte[] newBaseInsertionQuals = new byte[newLength];
|
||||
byte[] newBaseDeletionQuals = new byte[newLength];
|
||||
System.arraycopy(read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength);
|
||||
System.arraycopy(read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength);
|
||||
hardClippedRead.setBaseQualities(newBaseInsertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
|
||||
hardClippedRead.setBaseQualities(newBaseDeletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
|
||||
}
|
||||
|
||||
return hardClippedRead;
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import java.util.Map;
|
|||
* Date: Dec 29, 2009
|
||||
*/
|
||||
|
||||
public class NestedHashMap{
|
||||
public class NestedHashMap {
|
||||
|
||||
public final Map data = new HashMap<Object, Object>();
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,390 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.crypt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||
|
||||
import javax.crypto.Cipher;
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.security.*;
|
||||
import java.security.spec.InvalidKeySpecException;
|
||||
import java.security.spec.KeySpec;
|
||||
import java.security.spec.PKCS8EncodedKeySpec;
|
||||
import java.security.spec.X509EncodedKeySpec;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* A set of cryptographic utility methods and constants.
|
||||
*
|
||||
* Contains methods to:
|
||||
*
|
||||
* -Create a public/private key pair
|
||||
* -Read and write public/private keys to/from files/streams
|
||||
* -Load the GATK master private/public keys
|
||||
* -Encrypt/decrypt data
|
||||
*
|
||||
* Also contains constants that control the cryptographic defaults
|
||||
* throughout the GATK.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class CryptUtils {
|
||||
|
||||
// ---------------------------------------------------------------------------------
|
||||
// Constants (these control the default cryptographic settings throughout the GATK):
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Default key length in bits of newly-created keys. 2048 bits provides a good balance between
|
||||
* security and speed.
|
||||
*/
|
||||
public static final int DEFAULT_KEY_LENGTH = 2048;
|
||||
|
||||
/**
|
||||
* Default encryption algorithm to use, when none is specified.
|
||||
*/
|
||||
public static final String DEFAULT_ENCRYPTION_ALGORITHM = "RSA";
|
||||
|
||||
/**
|
||||
* Default random-number generation algorithm to use, when none is specified.
|
||||
*/
|
||||
public static final String DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM = "SHA1PRNG";
|
||||
|
||||
/**
|
||||
* Name of the public key file distributed with the GATK. This file is packaged
|
||||
* into the GATK jar, and we use the system ClassLoader to find it.
|
||||
*/
|
||||
public static final String GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME = "GATK_public.key";
|
||||
|
||||
/**
|
||||
* Location of the master copy of the GATK private key.
|
||||
*/
|
||||
public static final String GATK_MASTER_PRIVATE_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_private.key";
|
||||
|
||||
/**
|
||||
* Location of the master copy of the GATK public key. This file should always be the same as
|
||||
* the public key file distributed with the GATK (and there are automated tests to ensure that it is).
|
||||
*/
|
||||
public static final String GATK_MASTER_PUBLIC_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_public.key";
|
||||
|
||||
/**
|
||||
* Directory where generated GATK user keys are stored. See the GATKKey class for more information.
|
||||
*/
|
||||
public static final String GATK_USER_KEY_DIRECTORY = "/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/";
|
||||
|
||||
|
||||
// -----------------------
|
||||
// Utility Methods:
|
||||
// -----------------------
|
||||
|
||||
/**
|
||||
* Generate a new public/private key pair using the default encryption settings defined above.
|
||||
*
|
||||
* @return A new public/private key pair created using the default settings
|
||||
*/
|
||||
public static KeyPair generateKeyPair() {
|
||||
return generateKeyPair(DEFAULT_KEY_LENGTH, DEFAULT_ENCRYPTION_ALGORITHM, DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a new public/private key pair using custom encryption settings.
|
||||
*
|
||||
* @param keyLength Length of the key in bits
|
||||
* @param encryptionAlgorithm Encryption algorithm to use
|
||||
* @param randNumberAlgorithm Random-number generation algorithm to use
|
||||
* @return A new public/private key pair, created according to the specified parameters
|
||||
*/
|
||||
public static KeyPair generateKeyPair( int keyLength, String encryptionAlgorithm, String randNumberAlgorithm ) {
|
||||
try {
|
||||
KeyPairGenerator keyGen = KeyPairGenerator.getInstance(encryptionAlgorithm);
|
||||
SecureRandom randomnessSource = createRandomnessSource(randNumberAlgorithm);
|
||||
|
||||
keyGen.initialize(keyLength, randomnessSource);
|
||||
return keyGen.generateKeyPair();
|
||||
}
|
||||
catch ( NoSuchAlgorithmException e ) {
|
||||
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
|
||||
}
|
||||
catch ( Exception e ) {
|
||||
throw new ReviewedStingException("Error while generating key pair", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a source of randomness using the default random-number generation algorithm.
|
||||
*
|
||||
* @return A randomness source that uses the default algorithm
|
||||
*/
|
||||
public static SecureRandom createRandomnessSource() {
|
||||
return createRandomnessSource(DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a source of randomness using a custom random-number generation algorithm.
|
||||
*
|
||||
* @param randAlgorithm The random-number generation algorithm to use
|
||||
* @return A randomness sources that uses the specified algorithm
|
||||
*/
|
||||
public static SecureRandom createRandomnessSource ( String randAlgorithm ) {
|
||||
try {
|
||||
return SecureRandom.getInstance(randAlgorithm);
|
||||
}
|
||||
catch ( NoSuchAlgorithmException e ) {
|
||||
throw new ReviewedStingException(String.format("Could not find an implementation of the requested random-number generation algorithm %s", randAlgorithm), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a public/private key pair to disk
|
||||
*
|
||||
* @param keyPair The key pair we're writing to disk
|
||||
* @param privateKeyFile Location to write the private key
|
||||
* @param publicKeyFile Location to write the public key
|
||||
*/
|
||||
public static void writeKeyPair ( KeyPair keyPair, File privateKeyFile, File publicKeyFile ) {
|
||||
writeKey(keyPair.getPrivate(), privateKeyFile);
|
||||
writeKey(keyPair.getPublic(), publicKeyFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes an arbitrary key to disk
|
||||
*
|
||||
* @param key The key to write
|
||||
* @param destination Location to write the key to
|
||||
*/
|
||||
public static void writeKey ( Key key, File destination ) {
|
||||
IOUtils.writeByteArrayToFile(key.getEncoded(), destination);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a public key created using the default encryption algorithm from a file.
|
||||
*
|
||||
* @param source File containing the public key
|
||||
* @return The public key read
|
||||
*/
|
||||
public static PublicKey readPublicKey ( File source ) {
|
||||
return decodePublicKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a public key created using the default encryption algorithm from a stream.
|
||||
*
|
||||
* @param source Stream attached to the public key
|
||||
* @return The public key read
|
||||
*/
|
||||
public static PublicKey readPublicKey ( InputStream source ) {
|
||||
return decodePublicKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes the raw bytes of a public key into a usable object.
|
||||
*
|
||||
* @param rawKey The encoded bytes of a public key as read from, eg., a file. The
|
||||
* key must be in the standard X.509 format for a public key.
|
||||
* @param encryptionAlgorithm The encryption algorithm used to create the public key
|
||||
* @return The public key as a usable object
|
||||
*/
|
||||
public static PublicKey decodePublicKey ( byte[] rawKey, String encryptionAlgorithm ) {
|
||||
try {
|
||||
KeySpec keySpec = new X509EncodedKeySpec(rawKey);
|
||||
KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm);
|
||||
return keyFactory.generatePublic(keySpec);
|
||||
}
|
||||
catch ( NoSuchAlgorithmException e ) {
|
||||
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
|
||||
}
|
||||
catch ( InvalidKeySpecException e ) {
|
||||
throw new ReviewedStingException("Unable to use X.509 key specification to decode the given key", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a private key created using the default encryption algorithm from a file.
|
||||
*
|
||||
* @param source File containing the private key
|
||||
* @return The private key read
|
||||
*/
|
||||
public static PrivateKey readPrivateKey ( File source ) {
|
||||
return decodePrivateKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a private key created using the default encryption algorithm from a stream.
|
||||
*
|
||||
* @param source Stream attached to the private key
|
||||
* @return The private key read
|
||||
*/
|
||||
public static PrivateKey readPrivateKey ( InputStream source ) {
|
||||
return decodePrivateKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes the raw bytes of a private key into a usable object.
|
||||
*
|
||||
* @param rawKey The encoded bytes of a private key as read from, eg., a file. The
|
||||
* key must be in the standard PKCS #8 format for a private key.
|
||||
* @param encryptionAlgorithm The encryption algorithm used to create the private key
|
||||
* @return The private key as a usable object
|
||||
*/
|
||||
public static PrivateKey decodePrivateKey ( byte[] rawKey, String encryptionAlgorithm ) {
|
||||
try {
|
||||
KeySpec keySpec = new PKCS8EncodedKeySpec(rawKey);
|
||||
KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm);
|
||||
return keyFactory.generatePrivate(keySpec);
|
||||
}
|
||||
catch ( NoSuchAlgorithmException e ) {
|
||||
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
|
||||
}
|
||||
catch ( InvalidKeySpecException e ) {
|
||||
throw new ReviewedStingException("Unable to use the PKCS #8 key specification to decode the given key", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the copy of the GATK public key that is distributed with the GATK. Uses the system
|
||||
* ClassLoader to locate the public key file, which should be stored at the root of the GATK
|
||||
* jar file.
|
||||
*
|
||||
* @return The GATK public key as a usable object
|
||||
*/
|
||||
public static PublicKey loadGATKDistributedPublicKey() {
|
||||
InputStream publicKeyInputStream = ClassLoader.getSystemResourceAsStream(GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME);
|
||||
|
||||
if ( publicKeyInputStream == null ) {
|
||||
throw new ReviewedStingException(String.format("Could not locate the GATK public key %s in the classpath",
|
||||
GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME));
|
||||
}
|
||||
|
||||
return readPublicKey(publicKeyInputStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the master copy of the GATK private key. You must have the appropriate UNIX permissions
|
||||
* to do this!
|
||||
*
|
||||
* @return The GATK master private key as a usable object
|
||||
*/
|
||||
public static PrivateKey loadGATKMasterPrivateKey() {
|
||||
return readPrivateKey(new File(GATK_MASTER_PRIVATE_KEY_FILE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the master copy of the GATK public key. This should always be the same as the
|
||||
* public key distributed with the GATK returned by loadGATKDistributedPublicKey().
|
||||
*
|
||||
* @return The GATK master public key as a usable object
|
||||
*/
|
||||
public static PublicKey loadGATKMasterPublicKey() {
|
||||
return readPublicKey(new File(GATK_MASTER_PUBLIC_KEY_FILE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encrypts the given data using the key provided.
|
||||
*
|
||||
* @param data The data to encrypt, as a byte array
|
||||
* @param encryptKey The key with which to encrypt the data
|
||||
* @return The encrypted version of the provided data
|
||||
*/
|
||||
public static byte[] encryptData ( byte[] data, Key encryptKey ) {
|
||||
return transformDataUsingCipher(data, encryptKey, Cipher.ENCRYPT_MODE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrypts the given data using the key provided.
|
||||
*
|
||||
* @param encryptedData Data to decrypt, as a byte array
|
||||
* @param decryptKey The key with which to decrypt the data
|
||||
* @return The decrypted version of the provided data
|
||||
*/
|
||||
public static byte[] decryptData ( byte[] encryptedData, Key decryptKey ) {
|
||||
return transformDataUsingCipher(encryptedData, decryptKey, Cipher.DECRYPT_MODE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for encryption/decryption that takes data and processes it using
|
||||
* the given key
|
||||
*
|
||||
* @param data Data to encrypt/decrypt
|
||||
* @param key Key to use to encrypt/decrypt the data
|
||||
* @param cipherMode Specifies whether we are encrypting or decrypting
|
||||
* @return The encrypted/decrypted data
|
||||
*/
|
||||
private static byte[] transformDataUsingCipher ( byte[] data, Key key, int cipherMode ) {
|
||||
try {
|
||||
Cipher cipher = Cipher.getInstance(key.getAlgorithm());
|
||||
cipher.init(cipherMode, key);
|
||||
return cipher.doFinal(data);
|
||||
}
|
||||
catch ( NoSuchAlgorithmException e ) {
|
||||
throw new ReviewedStingException(String.format("Could not find an implementation of the requested algorithm %s",
|
||||
key.getAlgorithm()), e);
|
||||
}
|
||||
catch ( InvalidKeyException e ) {
|
||||
throw new ReviewedStingException("Key is invalid", e);
|
||||
}
|
||||
catch ( GeneralSecurityException e ) {
|
||||
throw new ReviewedStingException("Error during encryption", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests whether the public/private keys provided can each decrypt data encrypted by
|
||||
* the other key -- ie., tests whether these two keys are part of the same public/private
|
||||
* key pair.
|
||||
*
|
||||
* @param privateKey The private key to test
|
||||
* @param publicKey The public key to test
|
||||
* @return True if the keys are part of the same key pair and can decrypt each other's
|
||||
* encrypted data, otherwise false.
|
||||
*/
|
||||
public static boolean keysDecryptEachOther ( PrivateKey privateKey, PublicKey publicKey ) {
|
||||
byte[] plainText = "Test PlainText".getBytes();
|
||||
|
||||
byte[] dataEncryptedUsingPrivateKey = CryptUtils.encryptData(plainText, privateKey);
|
||||
byte[] dataEncryptedUsingPublicKey = CryptUtils.encryptData(plainText, publicKey);
|
||||
|
||||
byte[] privateKeyDataDecryptedWithPublicKey = CryptUtils.decryptData(dataEncryptedUsingPrivateKey, publicKey);
|
||||
byte[] publicKeyDataDecryptedWithPrivateKey = CryptUtils.decryptData(dataEncryptedUsingPublicKey, privateKey);
|
||||
|
||||
// Make sure we actually transformed the data during encryption:
|
||||
if ( Arrays.equals(plainText, dataEncryptedUsingPrivateKey) ||
|
||||
Arrays.equals(plainText, dataEncryptedUsingPublicKey) ||
|
||||
Arrays.equals(dataEncryptedUsingPrivateKey, dataEncryptedUsingPublicKey) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make sure that we were able to recreate the original plaintext using
|
||||
// both the public key on the private-key-encrypted data and the private
|
||||
// key on the public-key-encrypted data:
|
||||
if ( ! Arrays.equals(plainText, privateKeyDataDecryptedWithPublicKey) ||
|
||||
! Arrays.equals(plainText, publicKeyDataDecryptedWithPrivateKey) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,349 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.crypt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.security.*;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
/**
|
||||
* Class to represent a GATK user key.
|
||||
*
|
||||
* A GATK user key contains an email address and a cryptographic signature.
|
||||
* The signature is the SHA-1 hash of the email address encrypted using
|
||||
* the GATK master private key. The GATK master public key (distributed
|
||||
* with the GATK) is used to decrypt the signature and validate the key
|
||||
* at the start of each GATK run that requires a key.
|
||||
*
|
||||
* Keys are cryptographically secure in that valid keys definitely come
|
||||
* from us and cannot be fabricated, however nothing prevents keys from
|
||||
* being shared between users.
|
||||
*
|
||||
* GATK user keys have the following on-disk format:
|
||||
*
|
||||
* GZIP Container:
|
||||
* Email address
|
||||
* NUL byte (delimiter)
|
||||
* Cryptographic Signature (encrypted SHA-1 hash of email address)
|
||||
*
|
||||
* The key data is wrapped within a GZIP container to placate over-zealous
|
||||
* email filters (since keys must often be emailed) and also to provide an
|
||||
* additional integrity check via the built-in GZIP CRC.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class GATKKey {
|
||||
|
||||
/**
|
||||
* Private key used to sign the GATK key. Required only when creating a new
|
||||
* key from scratch, not when loading an existing key from disk.
|
||||
*/
|
||||
private PrivateKey privateKey;
|
||||
|
||||
/**
|
||||
* Public key used to validate the GATK key.
|
||||
*/
|
||||
private PublicKey publicKey;
|
||||
|
||||
/**
|
||||
* The user's email address, stored within the key and signed.
|
||||
*/
|
||||
private String emailAddress;
|
||||
|
||||
/**
|
||||
* The cryptographic signature of the email address. By default, this is
|
||||
* the SHA-1 hash of the email address encrypted using the RSA algorithm.
|
||||
*/
|
||||
private byte[] signature;
|
||||
|
||||
/**
|
||||
* The combination of hash/encryption algorithms to use to generate the signature.
|
||||
* By default this is "SHA1withRSA"
|
||||
*/
|
||||
private String signingAlgorithm;
|
||||
|
||||
/**
|
||||
* Default hash/encryption algorithms to use to sign the key.
|
||||
*/
|
||||
public static final String DEFAULT_SIGNING_ALGORITHM = "SHA1withRSA";
|
||||
|
||||
/**
|
||||
* Byte value used to separate the email address from its signature in the key file.
|
||||
*/
|
||||
public static final byte GATK_KEY_SECTIONAL_DELIMITER = 0;
|
||||
|
||||
|
||||
// -----------------------
|
||||
// Constructors:
|
||||
// -----------------------
|
||||
|
||||
/**
|
||||
* Constructor to create a new GATK key from scratch using an email address
|
||||
* and public/private key pair. The private key is used for signing, and the
|
||||
* public key is used to validate the newly-created key.
|
||||
*
|
||||
* @param privateKey Private key used to sign the new GATK key
|
||||
* @param publicKey Public key used to validate the new GATK key
|
||||
* @param emailAddress The user's email address, which we will store in the key and sign
|
||||
*/
|
||||
public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress ) {
|
||||
this(privateKey, publicKey, emailAddress, DEFAULT_SIGNING_ALGORITHM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor to create a new GATK key from scratch using an email address
|
||||
* and public/private key pair, and additionally specify the signing algorithm
|
||||
* to use. The private key is used for signing, and the public key is used to
|
||||
* validate the newly-created key.
|
||||
*
|
||||
* @param privateKey Private key used to sign the new GATK key
|
||||
* @param publicKey Public key used to validate the new GATK key
|
||||
* @param emailAddress The user's email address, which we will store in the key and sign
|
||||
* @param signingAlgorithm The combination of hash and encryption algorithms to use to sign the key
|
||||
*/
|
||||
public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress, String signingAlgorithm ) {
|
||||
if ( privateKey == null || publicKey == null || emailAddress == null || emailAddress.length() == 0 || signingAlgorithm == null ) {
|
||||
throw new ReviewedStingException("Cannot construct GATKKey using null/empty arguments");
|
||||
}
|
||||
|
||||
this.privateKey = privateKey;
|
||||
this.publicKey = publicKey;
|
||||
this.emailAddress = emailAddress;
|
||||
this.signingAlgorithm = signingAlgorithm;
|
||||
|
||||
validateEmailAddress();
|
||||
generateSignature();
|
||||
|
||||
if ( ! isValid() ) {
|
||||
throw new ReviewedStingException("Newly-generated GATK key fails validation -- this should never happen!");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor to load an existing GATK key from a file.
|
||||
*
|
||||
* During loading, the key file is checked for integrity, but not cryptographic
|
||||
* validity (which must be done through a subsequent call to isValid()).
|
||||
*
|
||||
* @param publicKey Public key that will be used to validate the loaded GATK key
|
||||
* in subsequent calls to isValid()
|
||||
* @param keyFile File containing the GATK key to load
|
||||
*/
|
||||
public GATKKey ( PublicKey publicKey, File keyFile ) {
|
||||
this(publicKey, keyFile, DEFAULT_SIGNING_ALGORITHM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor to load an existing GATK key from a file, and additionally specify
|
||||
* the signing algorithm used to sign the key being loaded.
|
||||
*
|
||||
* During loading, the key file is checked for integrity, but not cryptographic
|
||||
* validity (which must be done through a subsequent call to isValid()).
|
||||
*
|
||||
* @param publicKey Public key that will be used to validate the loaded GATK key
|
||||
* in subsequent calls to isValid()
|
||||
* @param keyFile File containing the GATK key to load
|
||||
* @param signingAlgorithm The combination of hash and encryption algorithms used to sign the key
|
||||
*/
|
||||
public GATKKey ( PublicKey publicKey, File keyFile, String signingAlgorithm ) {
|
||||
if ( publicKey == null || keyFile == null || signingAlgorithm == null ) {
|
||||
throw new ReviewedStingException("Cannot construct GATKKey using null arguments");
|
||||
}
|
||||
|
||||
this.publicKey = publicKey;
|
||||
this.signingAlgorithm = signingAlgorithm;
|
||||
|
||||
readKey(keyFile);
|
||||
}
|
||||
|
||||
// -----------------------
|
||||
// Public API Methods:
|
||||
// -----------------------
|
||||
|
||||
/**
|
||||
* Writes out this key to a file in the format described at the top of this class,
|
||||
* encapsulating the key within a GZIP container.
|
||||
*
|
||||
* @param destination File to write the key to
|
||||
*/
|
||||
public void writeKey ( File destination ) {
|
||||
try {
|
||||
byte[] keyBytes = marshalKeyData();
|
||||
IOUtils.writeByteArrayToStream(keyBytes, new GZIPOutputStream(new FileOutputStream(destination)));
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(destination, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether the signature of this key is cryptographically valid (ie., can be
|
||||
* decrypted by the public key to produce a valid SHA-1 hash of the email address
|
||||
* in the key).
|
||||
*
|
||||
* @return True if the key's signature passes validation, otherwise false
|
||||
*/
|
||||
public boolean isValid() {
|
||||
try {
|
||||
Signature sig = Signature.getInstance(signingAlgorithm);
|
||||
sig.initVerify(publicKey);
|
||||
sig.update(emailAddress.getBytes());
|
||||
return sig.verify(signature);
|
||||
}
|
||||
catch ( NoSuchAlgorithmException e ) {
|
||||
throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e);
|
||||
}
|
||||
catch ( InvalidKeyException e ) {
|
||||
// If the GATK public key is invalid, it's likely our problem, not the user's:
|
||||
throw new ReviewedStingException(String.format("Public key %s is invalid", publicKey), e);
|
||||
}
|
||||
catch ( SignatureException e ) {
|
||||
throw new UserException.UnreadableKeyException("Signature is invalid or signing algorithm was unable to process the input data", e);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------
|
||||
// Private Helper Methods:
|
||||
// -----------------------
|
||||
|
||||
/**
|
||||
* Helper method that creates a signature for this key using the combination of
|
||||
* hash/encryption algorithms specified at construction time.
|
||||
*/
|
||||
private void generateSignature() {
|
||||
try {
|
||||
Signature sig = Signature.getInstance(signingAlgorithm);
|
||||
sig.initSign(privateKey, CryptUtils.createRandomnessSource());
|
||||
sig.update(emailAddress.getBytes());
|
||||
signature = sig.sign();
|
||||
}
|
||||
catch ( NoSuchAlgorithmException e ) {
|
||||
throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e);
|
||||
}
|
||||
catch ( InvalidKeyException e ) {
|
||||
throw new ReviewedStingException(String.format("Private key %s is invalid", privateKey), e);
|
||||
}
|
||||
catch ( SignatureException e ) {
|
||||
throw new ReviewedStingException(String.format("Error creating signature for email address %s", emailAddress), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method that reads in a GATK key from a file. Should not be called directly --
|
||||
* use the appropriate constructor above.
|
||||
*
|
||||
* @param source File to read the key from
|
||||
*/
|
||||
private void readKey ( File source ) {
|
||||
try {
|
||||
byte[] keyBytes = IOUtils.readStreamIntoByteArray(new GZIPInputStream(new FileInputStream(source)));
|
||||
|
||||
// As a sanity check, compare the number of bytes read to the uncompressed file size
|
||||
// stored in the GZIP ISIZE field. If they don't match, the key must be corrupt:
|
||||
if ( keyBytes.length != IOUtils.getGZIPFileUncompressedSize(source) ) {
|
||||
throw new UserException.UnreadableKeyException("Number of bytes read does not match the uncompressed size specified in the GZIP ISIZE field");
|
||||
}
|
||||
|
||||
unmarshalKeyData(keyBytes);
|
||||
}
|
||||
catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(source, e);
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new UserException.UnreadableKeyException(source, e);
|
||||
}
|
||||
catch ( UserException.CouldNotReadInputFile e ) {
|
||||
throw new UserException.UnreadableKeyException(source, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method that assembles the email address and signature into a format
|
||||
* suitable for writing to disk.
|
||||
*
|
||||
* @return The aggregated key data, ready to be written to disk
|
||||
*/
|
||||
private byte[] marshalKeyData() {
|
||||
byte[] emailAddressBytes = emailAddress.getBytes();
|
||||
byte[] assembledKey = new byte[emailAddressBytes.length + 1 + signature.length];
|
||||
|
||||
System.arraycopy(emailAddressBytes, 0, assembledKey, 0, emailAddressBytes.length);
|
||||
assembledKey[emailAddressBytes.length] = GATK_KEY_SECTIONAL_DELIMITER;
|
||||
System.arraycopy(signature, 0, assembledKey, emailAddressBytes.length + 1, signature.length);
|
||||
|
||||
return assembledKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method that parses the raw key data from disk into its component
|
||||
* email address and signature. Performs some basic validation in the process.
|
||||
*
|
||||
* @param keyBytes The raw, uncompressed key data read from disk
|
||||
*/
|
||||
private void unmarshalKeyData ( byte[] keyBytes ) {
|
||||
int delimiterPosition = -1;
|
||||
|
||||
for ( int i = 0; i < keyBytes.length; i++ ) {
|
||||
if ( keyBytes[i] == GATK_KEY_SECTIONAL_DELIMITER ) {
|
||||
delimiterPosition = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( delimiterPosition == -1 ) {
|
||||
throw new UserException.UnreadableKeyException("Malformed GATK key contains no sectional delimiter");
|
||||
}
|
||||
else if ( delimiterPosition == 0 ) {
|
||||
throw new UserException.UnreadableKeyException("Malformed GATK key contains no email address");
|
||||
}
|
||||
else if ( delimiterPosition == keyBytes.length - 1 ) {
|
||||
throw new UserException.UnreadableKeyException("Malformed GATK key contains no signature");
|
||||
}
|
||||
|
||||
byte[] emailAddressBytes = new byte[delimiterPosition];
|
||||
System.arraycopy(keyBytes, 0, emailAddressBytes, 0, delimiterPosition);
|
||||
emailAddress = new String(emailAddressBytes);
|
||||
|
||||
signature = new byte[keyBytes.length - delimiterPosition - 1];
|
||||
System.arraycopy(keyBytes, delimiterPosition + 1, signature, 0, keyBytes.length - delimiterPosition - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method that ensures that the user's email address does not contain the NUL byte, which we
|
||||
* reserve as a delimiter within each key file.
|
||||
*/
|
||||
private void validateEmailAddress() {
|
||||
for ( byte b : emailAddress.getBytes() ) {
|
||||
if ( b == GATK_KEY_SECTIONAL_DELIMITER ) {
|
||||
throw new UserException(String.format("Email address must not contain a byte with value %d", GATK_KEY_SECTIONAL_DELIMITER));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -132,6 +132,10 @@ public class UserException extends ReviewedStingException {
|
|||
public CouldNotReadInputFile(File file, Exception e) {
|
||||
this(file, e.getMessage());
|
||||
}
|
||||
|
||||
public CouldNotReadInputFile(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -151,6 +155,10 @@ public class UserException extends ReviewedStingException {
|
|||
public CouldNotCreateOutputFile(File file, Exception e) {
|
||||
super(String.format("Couldn't write file %s because exception %s", file.getAbsolutePath(), e.getMessage()));
|
||||
}
|
||||
|
||||
public CouldNotCreateOutputFile(String message, Exception e) {
|
||||
super(message, e);
|
||||
}
|
||||
}
|
||||
|
||||
public static class MissortedBAM extends UserException {
|
||||
|
|
@ -319,4 +327,32 @@ public class UserException extends ReviewedStingException {
|
|||
"and try again.", null);
|
||||
}
|
||||
}
|
||||
|
||||
public static class UnreadableKeyException extends UserException {
|
||||
public UnreadableKeyException ( File f, Exception e ) {
|
||||
super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " +
|
||||
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
|
||||
f.getAbsolutePath(), e.getMessage()));
|
||||
}
|
||||
|
||||
public UnreadableKeyException ( String message, Exception e ) {
|
||||
this(String.format("%s. Error was: %s", message, e.getMessage()));
|
||||
}
|
||||
|
||||
public UnreadableKeyException ( String message ) {
|
||||
super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " +
|
||||
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
|
||||
message));
|
||||
}
|
||||
}
|
||||
|
||||
public static class KeySignatureVerificationException extends UserException {
|
||||
public KeySignatureVerificationException ( File f ) {
|
||||
super(String.format("The signature in key file %s failed cryptographic verification. " +
|
||||
"If this key was valid in the past, it's likely been revoked. " +
|
||||
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
|
||||
"for help.",
|
||||
f.getAbsolutePath()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -151,23 +151,16 @@ public class FragmentUtils {
|
|||
final int numBases = firstReadStop + secondRead.getReadLength();
|
||||
final byte[] bases = new byte[numBases];
|
||||
final byte[] quals = new byte[numBases];
|
||||
// BUGBUG: too verbose, clean this up.
|
||||
final byte[] insertionQuals = new byte[numBases];
|
||||
final byte[] deletionQuals = new byte[numBases];
|
||||
final byte[] firstReadBases = firstRead.getReadBases();
|
||||
final byte[] firstReadQuals = firstRead.getBaseQualities();
|
||||
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
|
||||
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
|
||||
final byte[] secondReadBases = secondRead.getReadBases();
|
||||
final byte[] secondReadQuals = secondRead.getBaseQualities();
|
||||
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
|
||||
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
|
||||
|
||||
for(int iii = 0; iii < firstReadStop; iii++) {
|
||||
bases[iii] = firstReadBases[iii];
|
||||
quals[iii] = firstReadQuals[iii];
|
||||
insertionQuals[iii] = firstReadInsertionQuals[iii];
|
||||
deletionQuals[iii] = firstReadDeletionQuals[iii];
|
||||
}
|
||||
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
||||
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
|
||||
|
|
@ -175,22 +168,16 @@ public class FragmentUtils {
|
|||
}
|
||||
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
|
||||
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
|
||||
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
|
||||
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
|
||||
}
|
||||
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
|
||||
bases[iii] = secondReadBases[iii-firstReadStop];
|
||||
quals[iii] = secondReadQuals[iii-firstReadStop];
|
||||
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
|
||||
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
|
||||
}
|
||||
|
||||
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
|
||||
returnRead.setAlignmentStart(firstRead.getUnclippedStart());
|
||||
returnRead.setReadBases( bases );
|
||||
returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION );
|
||||
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
|
||||
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
|
||||
returnRead.setBaseQualities( quals );
|
||||
returnRead.setReadGroup( firstRead.getReadGroup() );
|
||||
returnRead.setReferenceName( firstRead.getReferenceName() );
|
||||
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
|
||||
|
|
@ -199,6 +186,27 @@ public class FragmentUtils {
|
|||
returnRead.setCigar( new Cigar( cList ));
|
||||
returnRead.setMappingQuality( firstRead.getMappingQuality() );
|
||||
|
||||
if( firstRead.hasBaseIndelQualities() || secondRead.hasBaseIndelQualities() ) {
|
||||
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
|
||||
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
|
||||
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
|
||||
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
|
||||
for(int iii = 0; iii < firstReadStop; iii++) {
|
||||
insertionQuals[iii] = firstReadInsertionQuals[iii];
|
||||
deletionQuals[iii] = firstReadDeletionQuals[iii];
|
||||
}
|
||||
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
||||
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score
|
||||
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score
|
||||
}
|
||||
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
|
||||
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
|
||||
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
|
||||
}
|
||||
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
|
||||
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
|
||||
}
|
||||
|
||||
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();
|
||||
returnList.add(returnRead);
|
||||
return returnList;
|
||||
|
|
|
|||
|
|
@ -29,10 +29,13 @@ import org.apache.commons.io.FilenameUtils;
|
|||
import org.apache.commons.io.LineIterator;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.*;
|
||||
|
||||
public class IOUtils {
|
||||
|
|
@ -400,4 +403,173 @@ public class IOUtils {
|
|||
public static boolean isSpecialFile(File file) {
|
||||
return file != null && (file.getAbsolutePath().startsWith("/dev/") || file.equals(DEV_DIR));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the entirety of the given file into a byte array. Uses a read buffer size of 4096 bytes.
|
||||
*
|
||||
* @param source File to read
|
||||
* @return The contents of the file as a byte array
|
||||
*/
|
||||
public static byte[] readFileIntoByteArray ( File source ) {
|
||||
return readFileIntoByteArray(source, 4096);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the entirety of the given file into a byte array using the requested read buffer size.
|
||||
*
|
||||
* @param source File to read
|
||||
* @param readBufferSize Number of bytes to read in at one time
|
||||
* @return The contents of the file as a byte array
|
||||
*/
|
||||
public static byte[] readFileIntoByteArray ( File source, int readBufferSize ) {
|
||||
if ( source == null ) {
|
||||
throw new ReviewedStingException("Source file was null");
|
||||
}
|
||||
|
||||
byte[] fileContents;
|
||||
|
||||
try {
|
||||
fileContents = readStreamIntoByteArray(new FileInputStream(source), readBufferSize);
|
||||
}
|
||||
catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(source, e);
|
||||
}
|
||||
|
||||
if ( fileContents.length != source.length() ) {
|
||||
throw new UserException.CouldNotReadInputFile(String.format("Unable to completely read file %s: read only %d/%d bytes",
|
||||
source.getAbsolutePath(), fileContents.length, source.length()));
|
||||
}
|
||||
|
||||
return fileContents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all data from the given stream into a byte array. Uses a read buffer size of 4096 bytes.
|
||||
*
|
||||
* @param in Stream to read data from
|
||||
* @return The contents of the stream as a byte array
|
||||
*/
|
||||
public static byte[] readStreamIntoByteArray ( InputStream in ) {
|
||||
return readStreamIntoByteArray(in, 4096);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all data from the given stream into a byte array using the requested read buffer size.
|
||||
*
|
||||
* @param in Stream to read data from
|
||||
* @param readBufferSize Number of bytes to read in at one time
|
||||
* @return The contents of the stream as a byte array
|
||||
*/
|
||||
public static byte[] readStreamIntoByteArray ( InputStream in, int readBufferSize ) {
|
||||
if ( in == null ) {
|
||||
throw new ReviewedStingException("Input stream was null");
|
||||
}
|
||||
else if ( readBufferSize <= 0 ) {
|
||||
throw new ReviewedStingException("Read buffer size must be > 0");
|
||||
}
|
||||
|
||||
// Use a fixed-size buffer for each read, but a dynamically-growing buffer
|
||||
// to hold the accumulated contents of the file/stream:
|
||||
byte[] readBuffer = new byte[readBufferSize];
|
||||
ByteArrayOutputStream fileBuffer = new ByteArrayOutputStream(readBufferSize * 4);
|
||||
|
||||
try {
|
||||
try {
|
||||
int currentBytesRead;
|
||||
|
||||
while ( (currentBytesRead = in.read(readBuffer, 0, readBuffer.length)) >= 0 ) {
|
||||
fileBuffer.write(readBuffer, 0, currentBytesRead);
|
||||
}
|
||||
}
|
||||
finally {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile("I/O error reading from input stream", e);
|
||||
}
|
||||
|
||||
return fileBuffer.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the given array of bytes to a file
|
||||
*
|
||||
* @param bytes Data to write
|
||||
* @param destination File to write the data to
|
||||
*/
|
||||
public static void writeByteArrayToFile ( byte[] bytes, File destination ) {
|
||||
if ( destination == null ) {
|
||||
throw new ReviewedStingException("Destination file was null");
|
||||
}
|
||||
|
||||
try {
|
||||
writeByteArrayToStream(bytes, new FileOutputStream(destination));
|
||||
}
|
||||
catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(destination, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the given array of bytes to a stream
|
||||
*
|
||||
* @param bytes Data to write
|
||||
* @param out Stream to write the data to
|
||||
*/
|
||||
public static void writeByteArrayToStream ( byte[] bytes, OutputStream out ) {
|
||||
if ( bytes == null || out == null ) {
|
||||
throw new ReviewedStingException("Data to write or output stream was null");
|
||||
}
|
||||
|
||||
try {
|
||||
try {
|
||||
out.write(bytes);
|
||||
}
|
||||
finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile("I/O error writing to output stream", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the uncompressed size of a GZIP file. Uses the GZIP ISIZE field in the last
|
||||
* 4 bytes of the file to get this information.
|
||||
*
|
||||
* @param gzipFile GZIP-format file whose uncompressed size to determine
|
||||
* @return The uncompressed size (in bytes) of the GZIP file
|
||||
*/
|
||||
public static int getGZIPFileUncompressedSize ( File gzipFile ) {
|
||||
if ( gzipFile == null ) {
|
||||
throw new ReviewedStingException("GZIP file to examine was null");
|
||||
}
|
||||
|
||||
try {
|
||||
// The GZIP ISIZE field holds the uncompressed size of the compressed data.
|
||||
// It occupies the last 4 bytes of any GZIP file:
|
||||
RandomAccessFile in = new RandomAccessFile(gzipFile, "r");
|
||||
in.seek(gzipFile.length() - 4);
|
||||
byte[] sizeBytes = new byte[4];
|
||||
in.read(sizeBytes, 0, 4);
|
||||
|
||||
ByteBuffer byteBuf = ByteBuffer.wrap(sizeBytes);
|
||||
byteBuf.order(ByteOrder.LITTLE_ENDIAN); // The GZIP spec mandates little-endian byte order
|
||||
int uncompressedSize = byteBuf.getInt();
|
||||
|
||||
// If the size read in is negative, we've overflowed our signed integer:
|
||||
if ( uncompressedSize < 0 ) {
|
||||
throw new UserException.CouldNotReadInputFile(String.format("Cannot accurately determine the uncompressed size of file %s " +
|
||||
"because it's either larger than %d bytes or the GZIP ISIZE field is corrupt",
|
||||
gzipFile.getAbsolutePath(), Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
return uncompressedSize;
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(gzipFile, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -194,6 +194,10 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
}
|
||||
}
|
||||
|
||||
public boolean hasBaseIndelQualities() {
|
||||
return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null;
|
||||
}
|
||||
|
||||
public byte[] getBaseInsertionQualities() {
|
||||
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
|
||||
if( quals == null ) {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.apache.log4j.Logger;
|
|||
import org.apache.log4j.PatternLayout;
|
||||
import org.apache.log4j.spi.LoggingEvent;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.utils.crypt.CryptUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||
|
||||
|
|
@ -87,6 +88,9 @@ public abstract class BaseTest {
|
|||
public static final File testDirFile = new File("public/testdata/");
|
||||
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
||||
|
||||
public static final String keysDataLocation = validationDataLocation + "keys/";
|
||||
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
|
||||
|
||||
/** before the class starts up */
|
||||
static {
|
||||
// setup a basic log configuration
|
||||
|
|
@ -141,7 +145,7 @@ public abstract class BaseTest {
|
|||
*/
|
||||
public static class TestDataProvider {
|
||||
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
||||
private String name;
|
||||
protected String name;
|
||||
|
||||
/**
|
||||
* Create a new TestDataProvider instance bound to the class variable C
|
||||
|
|
|
|||
|
|
@ -0,0 +1,116 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.Median;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class MedianUnitTest extends BaseTest {
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Provider
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class MedianTestProvider extends TestDataProvider {
|
||||
final List<Integer> values = new ArrayList<Integer>();
|
||||
final int cap;
|
||||
final Integer expected;
|
||||
|
||||
public MedianTestProvider(int expected, int cap, Integer ... values) {
|
||||
super(MedianTestProvider.class);
|
||||
this.expected = expected;
|
||||
this.cap = cap;
|
||||
this.values.addAll(Arrays.asList(values));
|
||||
this.name = String.format("values=%s expected=%d cap=%d", this.values, expected, cap);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "MedianTestProvider")
|
||||
public Object[][] makeMedianTestProvider() {
|
||||
new MedianTestProvider(1, 1000, 0, 1, 2);
|
||||
new MedianTestProvider(1, 1000, 1, 0, 1, 2);
|
||||
new MedianTestProvider(1, 1000, 0, 1, 2, 3);
|
||||
new MedianTestProvider(2, 1000, 0, 1, 2, 3, 4);
|
||||
new MedianTestProvider(2, 1000, 4, 1, 2, 3, 0);
|
||||
new MedianTestProvider(1, 1000, 1);
|
||||
new MedianTestProvider(2, 1000, 2);
|
||||
new MedianTestProvider(1, 1000, 1, 2);
|
||||
|
||||
new MedianTestProvider(1, 3, 1);
|
||||
new MedianTestProvider(1, 3, 1, 2);
|
||||
new MedianTestProvider(2, 3, 1, 2, 3);
|
||||
new MedianTestProvider(2, 3, 1, 2, 3, 4);
|
||||
new MedianTestProvider(2, 3, 1, 2, 3, 4, 5);
|
||||
|
||||
new MedianTestProvider(1, 3, 1);
|
||||
new MedianTestProvider(1, 3, 1, 2);
|
||||
new MedianTestProvider(2, 3, 3, 2, 1);
|
||||
new MedianTestProvider(3, 3, 4, 3, 2, 1);
|
||||
new MedianTestProvider(4, 3, 5, 4, 3, 2, 1);
|
||||
|
||||
return MedianTestProvider.getTests(MedianTestProvider.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "MedianTestProvider")
|
||||
public void testBasicLikelihoods(MedianTestProvider cfg) {
|
||||
final Median<Integer> median = new Median<Integer>(cfg.cap);
|
||||
|
||||
int nAdded = 0;
|
||||
for ( final int value : cfg.values )
|
||||
if ( median.add(value) )
|
||||
nAdded++;
|
||||
|
||||
Assert.assertEquals(nAdded, median.size());
|
||||
|
||||
Assert.assertEquals(cfg.values.isEmpty(), median.isEmpty());
|
||||
Assert.assertEquals(cfg.values.size() >= cfg.cap, median.isFull());
|
||||
Assert.assertEquals(median.getMedian(), cfg.expected, cfg.toString());
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalStateException.class)
|
||||
public void testEmptyMedian() {
|
||||
final Median<Integer> median = new Median<Integer>();
|
||||
Assert.assertTrue(median.isEmpty());
|
||||
final Integer d = 100;
|
||||
Assert.assertEquals(median.getMedian(d), d);
|
||||
median.getMedian();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -30,6 +30,7 @@ import org.broad.tribble.FeatureCodec;
|
|||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
|
|
@ -45,7 +46,7 @@ import java.io.File;
|
|||
import java.util.*;
|
||||
|
||||
public class WalkerTest extends BaseTest {
|
||||
private static final boolean ENABLE_REPORTING = false;
|
||||
private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false;
|
||||
|
||||
@BeforeMethod
|
||||
public void initializeRandomGenerator() {
|
||||
|
|
@ -121,11 +122,19 @@ public class WalkerTest extends BaseTest {
|
|||
}
|
||||
|
||||
public class WalkerTestSpec {
|
||||
|
||||
// Arguments implicitly included in all Walker command lines, unless explicitly
|
||||
// disabled using the disableImplicitArgs() method below.
|
||||
final String IMPLICIT_ARGS = ENABLE_PHONE_HOME_FOR_TESTS ?
|
||||
String.format("-et %s", GATKRunReport.PhoneHomeOption.STANDARD) :
|
||||
String.format("-et %s -K %s", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile);
|
||||
|
||||
String args = "";
|
||||
int nOutputFiles = -1;
|
||||
List<String> md5s = null;
|
||||
List<String> exts = null;
|
||||
Class expectedException = null;
|
||||
boolean includeImplicitArgs = true;
|
||||
|
||||
// the default output path for the integration test
|
||||
private File outputFileLocation = null;
|
||||
|
|
@ -159,6 +168,10 @@ public class WalkerTest extends BaseTest {
|
|||
this.expectedException = expectedException;
|
||||
}
|
||||
|
||||
public String getArgsWithImplicitArgs() {
|
||||
return args + (includeImplicitArgs ? " " + IMPLICIT_ARGS : "");
|
||||
}
|
||||
|
||||
public void setOutputFileLocation(File outputFileLocation) {
|
||||
this.outputFileLocation = outputFileLocation;
|
||||
}
|
||||
|
|
@ -180,6 +193,9 @@ public class WalkerTest extends BaseTest {
|
|||
auxillaryFiles.put(expectededMD5sum, outputfile);
|
||||
}
|
||||
|
||||
public void disableImplicitArgs() {
|
||||
includeImplicitArgs = false;
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean parameterize() {
|
||||
|
|
@ -213,7 +229,7 @@ public class WalkerTest extends BaseTest {
|
|||
tmpFiles.add(fl);
|
||||
}
|
||||
|
||||
final String args = String.format(spec.args, tmpFiles.toArray());
|
||||
final String args = String.format(spec.getArgsWithImplicitArgs(), tmpFiles.toArray());
|
||||
System.out.println(Utils.dupString('-', 80));
|
||||
|
||||
if ( spec.expectsException() ) {
|
||||
|
|
@ -277,13 +293,10 @@ public class WalkerTest extends BaseTest {
|
|||
* @param args the argument list
|
||||
* @param expectedException the expected exception or null
|
||||
*/
|
||||
public static void executeTest(String name, String args, Class expectedException) {
|
||||
private void executeTest(String name, String args, Class expectedException) {
|
||||
CommandLineGATK instance = new CommandLineGATK();
|
||||
String[] command = Utils.escapeExpressions(args);
|
||||
|
||||
// add the logging level to each of the integration test commands
|
||||
command = Utils.appendArray(command, "-et", ENABLE_REPORTING ? "STANDARD" : "NO_ET");
|
||||
|
||||
// run the executable
|
||||
boolean gotAnException = false;
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,103 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* <p>
|
||||
* [Long description of the walker]
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Description of the Input]
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Description of the Output]
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T [walker name]
|
||||
* </pre>
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/1/12
|
||||
*/
|
||||
public class ContextCovariateUnitTest {
|
||||
ContextCovariate covariate;
|
||||
RecalibrationArgumentCollection RAC;
|
||||
Random random;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
RAC = new RecalibrationArgumentCollection();
|
||||
covariate = new ContextCovariate();
|
||||
random = GenomeAnalysisEngine.getRandomGenerator();
|
||||
covariate.initialize(RAC);
|
||||
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSimpleContexts() {
|
||||
byte [] quals = createRandomReadQuals(101);
|
||||
byte [] bbases = createRandomReadBases(101);
|
||||
String bases = stringFrom(bbases);
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
|
||||
CovariateValues values = covariate.getValues(read);
|
||||
verifyCovariateArray((BitSet []) values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
|
||||
verifyCovariateArray((BitSet []) values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
|
||||
verifyCovariateArray((BitSet []) values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
|
||||
for (int i=0; i<values.length; i++) {
|
||||
if (i >= contextSize)
|
||||
Assert.assertEquals(MathUtils.dnaFrom(values[i]), bases.substring(i-contextSize, i));
|
||||
else
|
||||
Assert.assertNull(values[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private String stringFrom(byte [] array) {
|
||||
String s = "";
|
||||
for (byte value : array)
|
||||
s += (char) value;
|
||||
return s;
|
||||
}
|
||||
|
||||
private byte [] createRandomReadQuals(int length) {
|
||||
byte [] quals = new byte[length];
|
||||
for (int i=0; i<length; i++)
|
||||
quals[i] = (byte) random.nextInt(50);
|
||||
return quals;
|
||||
}
|
||||
|
||||
private byte [] createRandomReadBases(int length) {
|
||||
byte [] bases = new byte[length];
|
||||
for (int i=0; i<length; i++) {
|
||||
switch(random.nextInt(4)) {
|
||||
case 0: bases[i] = 'A'; break;
|
||||
case 1: bases[i] = 'C'; break;
|
||||
case 2: bases[i] = 'G'; break;
|
||||
case 3: bases[i] = 'T'; break;
|
||||
}
|
||||
}
|
||||
return bases;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Tests ReadGroupProperties
|
||||
*/
|
||||
public class ReadGroupPropertiesIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void basicTest() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s",
|
||||
1,
|
||||
Arrays.asList("6b8cce223af28cbadcfe87a3b841fc56"));
|
||||
executeTest("ReadGroupProperties:", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("202b337ebbea3def1be8495eb363dfa8"));
|
||||
Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("ae29b9c9aacce8046dc780430540cd62"));
|
||||
Arrays.asList("c5b53231f4f6d9524bc4ec8115f44f5c"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("10027d13befaa07b7900a7af0ae0791c"));
|
||||
Arrays.asList("5af005255240a2186f04cb50851b8b6f"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "a08df9aea2b3df09cf90ff8e6e3be3ea";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -91,7 +91,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "32a34362dff51d8b73a3335048516d82";
|
||||
String md5 = "6358934c1c26345013a38261b8c45aa4";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -179,8 +179,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "2cb2544739e01f6c08fd820112914317" );
|
||||
e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" );
|
||||
e.put( 0.01, "926b58038dd4989bf7eda697a847eea9" );
|
||||
e.put( 1.0 / 1850, "93f44105b43b65730a3b821e27b0fa16" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -204,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
|
||||
Arrays.asList("a1b75a7e12b160b0be823228c958573f"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -223,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
|
||||
Arrays.asList("3bda1279cd6dcb47885f3e19466f11b9"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899"));
|
||||
Arrays.asList("d9fc3ba94a0d46029778c7b457e7292a"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -257,7 +257,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6"));
|
||||
Arrays.asList("b2e30ae3e5ffa6108f9f6178b1d2e679"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f"));
|
||||
Arrays.asList("2cd182a84613fa91a6020466d2d327e2"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd"));
|
||||
Arrays.asList("9cd08dc412a007933381e9c76c073899"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -290,7 +290,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("320f61c87253aba77d6dc782242cba8b"));
|
||||
Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||
Arrays.asList("c9897b80615c53a4ea10a4b193d56d9c"));
|
||||
Arrays.asList("2609675a356f2dfc86f8a1d911210978"));
|
||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("5282fdb1711a532d726c13507bf80a21"));
|
||||
Arrays.asList("4fdd8da77167881b71b3547da5c13f94"));
|
||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Basic unit test for Haplotype Class
|
||||
*/
|
||||
public class HaplotypeUnitTest extends BaseTest {
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleInsertionAllele() {
|
||||
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
|
||||
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
|
||||
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleDeletionAllele() {
|
||||
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
|
||||
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
|
||||
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleSNPAllele() {
|
||||
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
|
||||
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA";
|
||||
basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexInsertionAllele() {
|
||||
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
|
||||
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(10, CigarOperator.I));
|
||||
h1CigarList.add(new CigarElement(8, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(3, CigarOperator.D));
|
||||
h1CigarList.add(new CigarElement(7, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
|
||||
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexDeletionAllele() {
|
||||
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
|
||||
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(10, CigarOperator.I));
|
||||
h1CigarList.add(new CigarElement(8, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(3, CigarOperator.D));
|
||||
h1CigarList.add(new CigarElement(7, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
|
||||
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexSNPAllele() {
|
||||
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
|
||||
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(10, CigarOperator.I));
|
||||
h1CigarList.add(new CigarElement(8, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(3, CigarOperator.D));
|
||||
h1CigarList.add(new CigarElement(7, CigarOperator.M));
|
||||
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "AGCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("T", "G", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCTATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("G", "T", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGCGGGA" + "AGGC";
|
||||
basicInsertTest("G", "C", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
|
||||
final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
|
||||
final Haplotype h = new Haplotype(hap.getBytes());
|
||||
final Allele h1refAllele = Allele.create(ref, true);
|
||||
final Allele h1altAllele = Allele.create(alt, false);
|
||||
final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE, 0, cigar) );
|
||||
final Haplotype h1expected = new Haplotype(newHap.getBytes());
|
||||
Assert.assertEquals(h1, h1expected);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -205,16 +205,9 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testIntAndBitSetConversion() {
|
||||
Assert.assertEquals(428, MathUtils.intFrom(MathUtils.bitSetFrom(428)));
|
||||
Assert.assertEquals(239847, MathUtils.intFrom(MathUtils.bitSetFrom(239847)));
|
||||
Assert.assertEquals(12726, MathUtils.intFrom(MathUtils.bitSetFrom(12726)));
|
||||
Assert.assertEquals(0, MathUtils.intFrom(MathUtils.bitSetFrom(0)));
|
||||
Assert.assertEquals(1, MathUtils.intFrom(MathUtils.bitSetFrom(1)));
|
||||
Assert.assertEquals(65536, MathUtils.intFrom(MathUtils.bitSetFrom(65536)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Private functions used by testArrayShuffle()
|
||||
*/
|
||||
private boolean hasUniqueElements(Object[] x) {
|
||||
for (int i = 0; i < x.length; i++)
|
||||
for (int j = i + 1; j < x.length; j++)
|
||||
|
|
@ -230,10 +223,100 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
return set.isEmpty();
|
||||
}
|
||||
|
||||
private void p (Object []x) {
|
||||
for (Object v: x)
|
||||
System.out.print((Integer) v + " ");
|
||||
System.out.println();
|
||||
@Test(enabled = true)
|
||||
public void testIntAndBitSetConversion() {
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(239847)), 239847);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(12726)), 12726);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(0)), 0);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(1)), 1);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(65536)), 65536);
|
||||
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(Long.MAX_VALUE)), Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testDNAAndBitSetConversion() {
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("ACGT")), "ACGT");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AGGTGTTGT")), "AGGTGTTGT");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("A")), "A");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("C")), "C");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("G")), "G");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("T")), "T");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CC")), "CC");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AA")), "AA");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AAAA")), "AAAA");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CCCCCCCCCCCCCC")), "CCCCCCCCCCCCCC");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GGGGGGGGGGGGGG")), "GGGGGGGGGGGGGG");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("TTTTTTTTTTTTTT")), "TTTTTTTTTTTTTT");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GTAGACCGATCTCAGCTAGT")), "GTAGACCGATCTCAGCTAGT");
|
||||
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31)
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testApproximateLog10SumLog10() {
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
|
||||
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
|
||||
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNormalizeFromLog10() {
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9}));
|
||||
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
|
||||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Private function used by testNormalizeFromLog10()
|
||||
*/
|
||||
private boolean compareDoubleArrays(double[] b1, double[] b2) {
|
||||
if( b1.length != b2.length ) {
|
||||
return false; // sanity check
|
||||
}
|
||||
|
||||
for( int i=0; i < b1.length; i++ ){
|
||||
if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.crypt;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.security.Key;
|
||||
import java.security.KeyPair;
|
||||
import java.security.PrivateKey;
|
||||
import java.security.PublicKey;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class CryptUtilsUnitTest extends BaseTest {
|
||||
|
||||
@Test
|
||||
public void testGenerateValidKeyPairWithDefaultSettings() {
|
||||
KeyPair keyPair = CryptUtils.generateKeyPair();
|
||||
Assert.assertTrue(CryptUtils.keysDecryptEachOther(keyPair.getPrivate(), keyPair.getPublic()));
|
||||
}
|
||||
|
||||
@DataProvider( name = "InvalidKeyPairSettings" )
|
||||
public Object[][] invalidKeyPairSettingsDataProvider() {
|
||||
return new Object[][] {
|
||||
{ -1, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM},
|
||||
{ CryptUtils.DEFAULT_KEY_LENGTH, "Made-up algorithm", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM},
|
||||
{ CryptUtils.DEFAULT_KEY_LENGTH, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, "Made-up algorithm"}
|
||||
};
|
||||
}
|
||||
|
||||
@Test( dataProvider = "InvalidKeyPairSettings", expectedExceptions = ReviewedStingException.class )
|
||||
public void testGenerateKeyPairWithInvalidSettings( int keyLength, String encryptionAlgorithm, String randomNumberGenerationAlgorithm ) {
|
||||
KeyPair keyPair = CryptUtils.generateKeyPair(keyLength, encryptionAlgorithm, randomNumberGenerationAlgorithm);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGATKMasterKeyPairMutualDecryption() {
|
||||
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() {
|
||||
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKeyPairWriteThenRead() {
|
||||
KeyPair keyPair = CryptUtils.generateKeyPair();
|
||||
File privateKeyFile = createTempFile("testKeyPairWriteThenRead_private", "key");
|
||||
File publicKeyFile = createTempFile("testKeyPairWriteThenRead_public", "key");
|
||||
|
||||
CryptUtils.writeKeyPair(keyPair, privateKeyFile, publicKeyFile);
|
||||
|
||||
assertKeysAreEqual(keyPair.getPrivate(), CryptUtils.readPrivateKey(privateKeyFile));
|
||||
assertKeysAreEqual(keyPair.getPublic(), CryptUtils.readPublicKey(publicKeyFile));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPublicKeyWriteThenReadFromFile() {
|
||||
File keyFile = createTempFile("testPublicKeyWriteThenReadFromFile", "key");
|
||||
PublicKey publicKey = CryptUtils.generateKeyPair().getPublic();
|
||||
|
||||
CryptUtils.writeKey(publicKey, keyFile);
|
||||
|
||||
assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(keyFile));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPublicKeyWriteThenReadFromStream() throws IOException {
|
||||
File keyFile = createTempFile("testPublicKeyWriteThenReadFromStream", "key");
|
||||
PublicKey publicKey = CryptUtils.generateKeyPair().getPublic();
|
||||
|
||||
CryptUtils.writeKey(publicKey, keyFile);
|
||||
|
||||
assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(new FileInputStream(keyFile)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPrivateKeyWriteThenReadFromFile() {
|
||||
File keyFile = createTempFile("testPrivateKeyWriteThenReadFromFile", "key");
|
||||
PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate();
|
||||
|
||||
CryptUtils.writeKey(privateKey, keyFile);
|
||||
|
||||
assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(keyFile));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPrivateKeyWriteThenReadFromStream() throws IOException {
|
||||
File keyFile = createTempFile("testPrivateKeyWriteThenReadFromStream", "key");
|
||||
PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate();
|
||||
|
||||
CryptUtils.writeKey(privateKey, keyFile);
|
||||
|
||||
assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(new FileInputStream(keyFile)));
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
|
||||
public void testReadNonExistentPublicKey() {
|
||||
File nonExistentFile = new File("jdshgkdfhg.key");
|
||||
Assert.assertFalse(nonExistentFile.exists());
|
||||
|
||||
CryptUtils.readPublicKey(nonExistentFile);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
|
||||
public void testReadNonExistentPrivateKey() {
|
||||
File nonExistentFile = new File("jdshgkdfhg.key");
|
||||
Assert.assertFalse(nonExistentFile.exists());
|
||||
|
||||
CryptUtils.readPrivateKey(nonExistentFile);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDecodePublicKey() {
|
||||
PublicKey originalKey = CryptUtils.generateKeyPair().getPublic();
|
||||
PublicKey decodedKey = CryptUtils.decodePublicKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM);
|
||||
assertKeysAreEqual(originalKey, decodedKey);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDecodePrivateKey() {
|
||||
PrivateKey originalKey = CryptUtils.generateKeyPair().getPrivate();
|
||||
PrivateKey decodedKey = CryptUtils.decodePrivateKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM);
|
||||
assertKeysAreEqual(originalKey, decodedKey);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadGATKMasterPrivateKey() {
|
||||
PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadGATKMasterPublicKey() {
|
||||
PublicKey gatkMasterPublicKey = CryptUtils.loadGATKMasterPublicKey();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadGATKDistributedPublicKey() {
|
||||
PublicKey gatkDistributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||
}
|
||||
|
||||
private void assertKeysAreEqual( Key originalKey, Key keyFromDisk ) {
|
||||
Assert.assertTrue(Arrays.equals(originalKey.getEncoded(), keyFromDisk.getEncoded()));
|
||||
Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm());
|
||||
Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.crypt;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class GATKKeyIntegrationTest extends WalkerTest {
|
||||
|
||||
public static final String BASE_COMMAND = String.format("-T PrintReads -R %s -I %s -o %%s",
|
||||
testDir + "exampleFASTA.fasta",
|
||||
testDir + "exampleBAM.bam");
|
||||
public static final String MD5_UPON_SUCCESSFUL_RUN = "b9dc5bf6753ca2819e70b056eaf61258";
|
||||
|
||||
|
||||
private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) {
|
||||
String command = BASE_COMMAND + String.format(" %s %s", etArg, keyArg);
|
||||
|
||||
WalkerTestSpec spec = expectedException != null ?
|
||||
new WalkerTestSpec(command, 1, expectedException) :
|
||||
new WalkerTestSpec(command, 1, Arrays.asList(md5));
|
||||
|
||||
spec.disableImplicitArgs(); // Turn off automatic inclusion of -et/-K args by WalkerTest
|
||||
executeTest(testName, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidKeyNoET() {
|
||||
runGATKKeyTest("testValidKeyNoET",
|
||||
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
|
||||
"-K " + keysDataLocation + "valid.key",
|
||||
null,
|
||||
MD5_UPON_SUCCESSFUL_RUN);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidKeyETStdout() {
|
||||
runGATKKeyTest("testValidKeyETStdout",
|
||||
"-et " + GATKRunReport.PhoneHomeOption.STDOUT,
|
||||
"-K " + keysDataLocation + "valid.key",
|
||||
null,
|
||||
MD5_UPON_SUCCESSFUL_RUN);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidKeyETStandard() {
|
||||
runGATKKeyTest("testValidKeyETStandard",
|
||||
"",
|
||||
"-K " + keysDataLocation + "valid.key",
|
||||
null,
|
||||
MD5_UPON_SUCCESSFUL_RUN);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoKeyNoET() {
|
||||
runGATKKeyTest("testNoKeyNoET",
|
||||
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
|
||||
"",
|
||||
UserException.class,
|
||||
null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoKeyETStdout() {
|
||||
runGATKKeyTest("testNoKeyETStdout",
|
||||
"-et " + GATKRunReport.PhoneHomeOption.STDOUT,
|
||||
"",
|
||||
UserException.class,
|
||||
null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoKeyETStandard() {
|
||||
runGATKKeyTest("testNoKeyETStandard",
|
||||
"",
|
||||
"",
|
||||
null,
|
||||
MD5_UPON_SUCCESSFUL_RUN);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRevokedKey() {
|
||||
runGATKKeyTest("testRevokedKey",
|
||||
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
|
||||
"-K " + keysDataLocation + "revoked.key",
|
||||
UserException.KeySignatureVerificationException.class,
|
||||
null);
|
||||
}
|
||||
|
||||
@DataProvider(name = "CorruptKeyTestData")
|
||||
public Object[][] corruptKeyDataProvider() {
|
||||
return new Object[][] {
|
||||
{ "corrupt_empty.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_single_byte_file.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_random_contents.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_single_byte_deletion.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_single_byte_insertion.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_single_byte_change.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_multi_byte_deletion.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_multi_byte_insertion.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_multi_byte_change.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_bad_isize_field.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_bad_crc.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_no_email_address.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_no_sectional_delimiter.key", UserException.KeySignatureVerificationException.class },
|
||||
{ "corrupt_no_signature.key", UserException.UnreadableKeyException.class },
|
||||
{ "corrupt_bad_signature.key", UserException.KeySignatureVerificationException.class },
|
||||
{ "corrupt_non_gzipped_valid_key.key", UserException.UnreadableKeyException.class }
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "CorruptKeyTestData")
|
||||
public void testCorruptKey ( String corruptKeyName, Class expectedException ) {
|
||||
runGATKKeyTest(String.format("testCorruptKey (%s)", corruptKeyName),
|
||||
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
|
||||
"-K " + keysDataLocation + corruptKeyName,
|
||||
expectedException,
|
||||
null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCorruptButNonRequiredKey() {
|
||||
runGATKKeyTest("testCorruptButNonRequiredKey",
|
||||
"",
|
||||
"-K " + keysDataLocation + "corrupt_random_contents.key",
|
||||
null,
|
||||
MD5_UPON_SUCCESSFUL_RUN);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.crypt;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.io.File;
|
||||
import java.security.KeyPair;
|
||||
import java.security.PrivateKey;
|
||||
import java.security.PublicKey;
|
||||
|
||||
public class GATKKeyUnitTest extends BaseTest {
|
||||
|
||||
@Test
|
||||
public void testCreateGATKKeyUsingMasterKeyPair() {
|
||||
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||
PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey();
|
||||
|
||||
// We should be able to create a valid GATKKey using our master key pair:
|
||||
GATKKey key = new GATKKey(masterPrivateKey, masterPublicKey, "foo@bar.com");
|
||||
Assert.assertTrue(key.isValid());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() {
|
||||
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||
PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||
|
||||
// We should also be able to create a valid GATKKey using our master private
|
||||
// key and the public key we distribute with the GATK:
|
||||
GATKKey key = new GATKKey(masterPrivateKey, distributedPublicKey, "foo@bar.com");
|
||||
Assert.assertTrue(key.isValid());
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = ReviewedStingException.class )
|
||||
public void testKeyPairMismatch() {
|
||||
KeyPair firstKeyPair = CryptUtils.generateKeyPair();
|
||||
KeyPair secondKeyPair = CryptUtils.generateKeyPair();
|
||||
|
||||
// Attempting to create a GATK Key with private and public keys that aren't part of the
|
||||
// same key pair should immediately trigger a validation failure:
|
||||
GATKKey key = new GATKKey(firstKeyPair.getPrivate(), secondKeyPair.getPublic(), "foo@bar.com");
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = ReviewedStingException.class )
|
||||
public void testEncryptionAlgorithmMismatch() {
|
||||
KeyPair keyPair = CryptUtils.generateKeyPair(CryptUtils.DEFAULT_KEY_LENGTH, "DSA", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
|
||||
|
||||
// Attempting to use a DSA private key to create an RSA signature should throw an error:
|
||||
GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), "foo@bar.com", "SHA1withRSA");
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.class )
|
||||
public void testInvalidEmailAddress() {
|
||||
String emailAddressWithNulByte = new String(new byte[] { 0 });
|
||||
KeyPair keyPair = CryptUtils.generateKeyPair();
|
||||
|
||||
// Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file:
|
||||
GATKKey key = new GATKKey(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey(),
|
||||
emailAddressWithNulByte);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateGATKKeyFromValidKeyFile() {
|
||||
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "valid.key"));
|
||||
Assert.assertTrue(key.isValid());
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.UnreadableKeyException.class )
|
||||
public void testCreateGATKKeyFromCorruptKeyFile() {
|
||||
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "corrupt_random_contents.key"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateGATKKeyFromRevokedKeyFile() {
|
||||
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "revoked.key"));
|
||||
Assert.assertFalse(key.isValid());
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
|
||||
public void testCreateGATKKeyFromNonExistentFile() {
|
||||
File nonExistentFile = new File("ghfdkgsdhg.key");
|
||||
Assert.assertFalse(nonExistentFile.exists());
|
||||
|
||||
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile);
|
||||
}
|
||||
}
|
||||
|
|
@ -27,12 +27,18 @@ package org.broadinstitute.sting.utils.io;
|
|||
import org.apache.commons.io.FileUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class IOUtilsUnitTest extends BaseTest {
|
||||
|
|
@ -230,4 +236,90 @@ public class IOUtilsUnitTest extends BaseTest {
|
|||
Assert.assertFalse(IOUtils.isSpecialFile(new File("/home/user/my.file")));
|
||||
Assert.assertFalse(IOUtils.isSpecialFile(new File("/devfake/null")));
|
||||
}
|
||||
|
||||
@DataProvider( name = "ByteArrayIOTestData")
|
||||
public Object[][] byteArrayIOTestDataProvider() {
|
||||
return new Object[][] {
|
||||
// file size, read buffer size
|
||||
{ 0, 4096 },
|
||||
{ 1, 4096 },
|
||||
{ 2000, 4096 },
|
||||
{ 4095, 4096 },
|
||||
{ 4096, 4096 },
|
||||
{ 4097, 4096 },
|
||||
{ 6000, 4096 },
|
||||
{ 8191, 4096 },
|
||||
{ 8192, 4096 },
|
||||
{ 8193, 4096 },
|
||||
{ 10000, 4096 }
|
||||
};
|
||||
}
|
||||
|
||||
@Test( dataProvider = "ByteArrayIOTestData" )
|
||||
public void testWriteThenReadFileIntoByteArray ( int fileSize, int readBufferSize ) throws Exception {
|
||||
File tempFile = createTempFile(String.format("testWriteThenReadFileIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp");
|
||||
|
||||
byte[] dataWritten = getDeterministicRandomData(fileSize);
|
||||
IOUtils.writeByteArrayToFile(dataWritten, tempFile);
|
||||
byte[] dataRead = IOUtils.readFileIntoByteArray(tempFile, readBufferSize);
|
||||
|
||||
Assert.assertEquals(dataRead.length, dataWritten.length);
|
||||
Assert.assertTrue(Arrays.equals(dataRead, dataWritten));
|
||||
}
|
||||
|
||||
@Test( dataProvider = "ByteArrayIOTestData" )
|
||||
public void testWriteThenReadStreamIntoByteArray ( int fileSize, int readBufferSize ) throws Exception {
|
||||
File tempFile = createTempFile(String.format("testWriteThenReadStreamIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp");
|
||||
|
||||
byte[] dataWritten = getDeterministicRandomData(fileSize);
|
||||
IOUtils.writeByteArrayToStream(dataWritten, new FileOutputStream(tempFile));
|
||||
byte[] dataRead = IOUtils.readStreamIntoByteArray(new FileInputStream(tempFile), readBufferSize);
|
||||
|
||||
Assert.assertEquals(dataRead.length, dataWritten.length);
|
||||
Assert.assertTrue(Arrays.equals(dataRead, dataWritten));
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
|
||||
public void testReadNonExistentFileIntoByteArray() {
|
||||
File nonExistentFile = new File("djfhsdkjghdfk");
|
||||
Assert.assertFalse(nonExistentFile.exists());
|
||||
|
||||
IOUtils.readFileIntoByteArray(nonExistentFile);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = ReviewedStingException.class )
|
||||
public void testReadNullStreamIntoByteArray() {
|
||||
IOUtils.readStreamIntoByteArray(null);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = ReviewedStingException.class )
|
||||
public void testReadStreamIntoByteArrayInvalidBufferSize() throws Exception {
|
||||
IOUtils.readStreamIntoByteArray(new FileInputStream(createTempFile("testReadStreamIntoByteArrayInvalidBufferSize", "tmp")),
|
||||
-1);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.CouldNotCreateOutputFile.class )
|
||||
public void testWriteByteArrayToUncreatableFile() {
|
||||
IOUtils.writeByteArrayToFile(new byte[]{0}, new File("/dev/foo/bar"));
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = ReviewedStingException.class )
|
||||
public void testWriteNullByteArrayToFile() {
|
||||
IOUtils.writeByteArrayToFile(null, createTempFile("testWriteNullByteArrayToFile", "tmp"));
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = ReviewedStingException.class )
|
||||
public void testWriteByteArrayToNullStream() {
|
||||
IOUtils.writeByteArrayToStream(new byte[]{0}, null);
|
||||
}
|
||||
|
||||
private byte[] getDeterministicRandomData ( int size ) {
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
Random rand = GenomeAnalysisEngine.getRandomGenerator();
|
||||
|
||||
byte[] randomData = new byte[size];
|
||||
rand.nextBytes(randomData);
|
||||
|
||||
return randomData;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -36,6 +36,8 @@
|
|||
<dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
|
||||
<!-- All R scripts in org.broadinstitute.sting -->
|
||||
<dir name="org/broadinstitute/sting" includes="**/*.R" />
|
||||
<!-- The GATK public key -->
|
||||
<file path="GATK_public.key" />
|
||||
</dependencies>
|
||||
</executable>
|
||||
<resources>
|
||||
|
|
|
|||
Loading…
Reference in New Issue