Merge branch 'master' of ssh://ni.broadinstitute.org/humgen/gsa-scr1/chartl/dev/unstable
This commit is contained in:
commit
67def6acc8
|
|
@ -47,6 +47,7 @@
|
||||||
<property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
|
<property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
|
||||||
<property name="resource.file" value="StingText.properties" />
|
<property name="resource.file" value="StingText.properties" />
|
||||||
<property name="resource.path" value="${java.classes}/StingText.properties" />
|
<property name="resource.path" value="${java.classes}/StingText.properties" />
|
||||||
|
<property name="key.dir" value="${public.dir}/keys" />
|
||||||
|
|
||||||
<property name="scala.public.source.dir" value="${public.dir}/scala/src" />
|
<property name="scala.public.source.dir" value="${public.dir}/scala/src" />
|
||||||
<property name="scala.private.source.dir" value="${private.dir}/scala/src" />
|
<property name="scala.private.source.dir" value="${private.dir}/scala/src" />
|
||||||
|
|
@ -567,6 +568,7 @@
|
||||||
</fileset>
|
</fileset>
|
||||||
<fileset dir="${java.classes}" includes="**/commandline/**/*.class"/>
|
<fileset dir="${java.classes}" includes="**/commandline/**/*.class"/>
|
||||||
<fileset dir="${java.classes}" includes="**/sting/pipeline/**/*.class"/>
|
<fileset dir="${java.classes}" includes="**/sting/pipeline/**/*.class"/>
|
||||||
|
<fileset dir="${java.classes}" includes="**/sting/tools/**/*.class"/>
|
||||||
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
|
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
|
||||||
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
|
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
|
||||||
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
|
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
|
||||||
|
|
@ -615,6 +617,9 @@
|
||||||
<include name="**/gatk/**/*.R"/>
|
<include name="**/gatk/**/*.R"/>
|
||||||
<include name="**/alignment/**/*.R"/>
|
<include name="**/alignment/**/*.R"/>
|
||||||
</fileset>
|
</fileset>
|
||||||
|
<fileset dir="${key.dir}">
|
||||||
|
<include name="**/*.key"/>
|
||||||
|
</fileset>
|
||||||
<manifest>
|
<manifest>
|
||||||
<attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
<attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
||||||
</manifest>
|
</manifest>
|
||||||
|
|
@ -879,6 +884,7 @@
|
||||||
<pathelement location="${R.tar.dir}" />
|
<pathelement location="${R.tar.dir}" />
|
||||||
<pathelement location="${R.public.scripts.dir}" />
|
<pathelement location="${R.public.scripts.dir}" />
|
||||||
<pathelement location="${R.private.scripts.dir}" />
|
<pathelement location="${R.private.scripts.dir}" />
|
||||||
|
<pathelement location="${key.dir}" />
|
||||||
<path refid="external.dependencies" />
|
<path refid="external.dependencies" />
|
||||||
</path>
|
</path>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,11 +4,9 @@
|
||||||
colnames(d) = tableHeader;
|
colnames(d) = tableHeader;
|
||||||
|
|
||||||
for (i in 1:ncol(d)) {
|
for (i in 1:ncol(d)) {
|
||||||
v = suppressWarnings(as.numeric(d[,i]));
|
# use the general type.convert infrastructure of read.table to convert column data to R types
|
||||||
|
v = type.convert(d[,i])
|
||||||
if (length(na.omit(as.numeric(v))) == length(d[,i])) {
|
d[,i] = v;
|
||||||
d[,i] = v;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
usedNames = ls(envir=tableEnv, pattern=tableName);
|
usedNames = ls(envir=tableEnv, pattern=tableName);
|
||||||
|
|
|
||||||
|
|
@ -35,9 +35,12 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
import org.broadinstitute.sting.utils.crypt.CryptUtils;
|
||||||
|
import org.broadinstitute.sting.utils.crypt.GATKKey;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||||
|
|
||||||
|
import java.security.PublicKey;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -78,6 +81,9 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
Walker<?,?> walker = engine.getWalkerByName(getAnalysisName());
|
Walker<?,?> walker = engine.getWalkerByName(getAnalysisName());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// Make sure a valid GATK user key is present, if required.
|
||||||
|
authorizeGATKRun();
|
||||||
|
|
||||||
engine.setArguments(getArgumentCollection());
|
engine.setArguments(getArgumentCollection());
|
||||||
|
|
||||||
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
||||||
|
|
@ -130,6 +136,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Authorizes this run of the GATK by checking for a valid GATK user key, if required.
|
||||||
|
* Currently, a key is required only if running with the -et NO_ET or -et STDOUT options.
|
||||||
|
*/
|
||||||
|
private void authorizeGATKRun() {
|
||||||
|
if ( getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.NO_ET ||
|
||||||
|
getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
|
||||||
|
if ( getArgumentCollection().gatkKeyFile == null ) {
|
||||||
|
throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
|
||||||
|
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
|
||||||
|
"for more information and instructions on how to obtain a key.");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||||
|
GATKKey gatkUserKey = new GATKKey(gatkPublicKey, getArgumentCollection().gatkKeyFile);
|
||||||
|
|
||||||
|
if ( ! gatkUserKey.isValid() ) {
|
||||||
|
throw new UserException.KeySignatureVerificationException(getArgumentCollection().gatkKeyFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
|
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
|
||||||
|
|
|
||||||
|
|
@ -65,9 +65,12 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
|
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
|
||||||
public Integer readBufferSize = null;
|
public Integer readBufferSize = null;
|
||||||
|
|
||||||
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
|
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
|
||||||
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
|
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
|
||||||
|
|
||||||
|
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
|
||||||
|
public File gatkKeyFile = null;
|
||||||
|
|
||||||
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
|
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
|
||||||
public List<String> readFilters = new ArrayList<String>();
|
public List<String> readFilters = new ArrayList<String>();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -154,9 +154,7 @@ public class GATKRunReport {
|
||||||
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
|
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
|
||||||
STANDARD,
|
STANDARD,
|
||||||
/** Force output to STDOUT. For debugging only */
|
/** Force output to STDOUT. For debugging only */
|
||||||
STDOUT,
|
STDOUT
|
||||||
/** Force output to S3. For debugging only */
|
|
||||||
AWS_S3 // todo -- remove me -- really just for testing purposes
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss");
|
private static final DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss");
|
||||||
|
|
@ -239,11 +237,8 @@ public class GATKRunReport {
|
||||||
case STDOUT:
|
case STDOUT:
|
||||||
postReportToStream(System.out);
|
postReportToStream(System.out);
|
||||||
break;
|
break;
|
||||||
case AWS_S3:
|
|
||||||
postReportToAWSS3();
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
exceptDuringRunReport("BUG: unexcepted PhoneHomeOption ");
|
exceptDuringRunReport("BUG: unexpected PhoneHomeOption ");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,15 +43,15 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
||||||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
||||||
|
|
||||||
if ( vc.isSNP() ) {
|
if ( vc.isSNP() ) {
|
||||||
|
final List<Byte> altAlleles = new ArrayList<Byte>();
|
||||||
|
for ( final Allele a : vc.getAlternateAlleles() )
|
||||||
|
altAlleles.add(a.getBases()[0]);
|
||||||
|
|
||||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||||
if ( context == null )
|
if ( context == null )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final List<Byte> altAlleles = new ArrayList<Byte>();
|
|
||||||
for ( final Allele a : vc.getAlternateAlleles() )
|
|
||||||
altAlleles.add(a.getBases()[0]);
|
|
||||||
|
|
||||||
fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals);
|
fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals);
|
||||||
}
|
}
|
||||||
} else if ( vc.isIndel() || vc.isMixed() ) {
|
} else if ( vc.isIndel() || vc.isMixed() ) {
|
||||||
|
|
|
||||||
|
|
@ -26,10 +26,12 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -43,10 +45,6 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
private int insertionsContextSize;
|
private int insertionsContextSize;
|
||||||
private int deletionsContextSize;
|
private int deletionsContextSize;
|
||||||
|
|
||||||
private String mismatchesNoContext = "";
|
|
||||||
private String insertionsNoContext = "";
|
|
||||||
private String deletionsNoContext = "";
|
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||||
|
|
@ -57,29 +55,26 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
|
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
|
||||||
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
|
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
|
||||||
|
|
||||||
// initialize no context strings given the size of the context for each covariate type
|
|
||||||
mismatchesNoContext = makeAllNStringWithLength(mismatchesContextSize);
|
|
||||||
insertionsNoContext = makeAllNStringWithLength(insertionsContextSize);
|
|
||||||
deletionsNoContext = makeAllNStringWithLength( deletionsContextSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||||
int l = read.getReadLength();
|
int l = read.getReadLength();
|
||||||
String[] mismatches = new String [l];
|
BitSet[] mismatches = new BitSet[l];
|
||||||
String[] insertions = new String [l];
|
BitSet[] insertions = new BitSet[l];
|
||||||
String[] deletions = new String [l];
|
BitSet[] deletions = new BitSet[l];
|
||||||
|
|
||||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||||
byte[] bases = read.getReadBases();
|
byte[] bases = read.getReadBases();
|
||||||
if (negativeStrand) {
|
if (negativeStrand)
|
||||||
bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
|
bases = BaseUtils.simpleReverseComplement(bases);
|
||||||
}
|
|
||||||
for (int i = 0; i < read.getReadLength(); i++) {
|
for (int i = 0; i < read.getReadLength(); i++) {
|
||||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext);
|
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
|
||||||
insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext);
|
insertions[i] = contextWith(bases, i, insertionsContextSize);
|
||||||
deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext);
|
deletions[i] = contextWith(bases, i, deletionsContextSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (negativeStrand) {
|
if (negativeStrand) {
|
||||||
reverse(mismatches);
|
reverse(mismatches);
|
||||||
reverse(insertions);
|
reverse(insertions);
|
||||||
|
|
@ -90,7 +85,7 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
@Override
|
@Override
|
||||||
public final Comparable getValue(final String str) {
|
public final Object getValue(final String str) {
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -100,29 +95,28 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
* @param bases the bases in the read to build the context from
|
* @param bases the bases in the read to build the context from
|
||||||
* @param offset the position in the read to calculate the context for
|
* @param offset the position in the read to calculate the context for
|
||||||
* @param contextSize context size to use building the context
|
* @param contextSize context size to use building the context
|
||||||
* @param noContextString string to return if the position is not far enough in the read to have a full context before.
|
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private String contextWith(byte [] bases, int offset, int contextSize, String noContextString) {
|
private BitSet contextWith(byte [] bases, int offset, int contextSize) {
|
||||||
return (offset < contextSize) ? noContextString : new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
if (offset < contextSize)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
||||||
|
if (context.contains("N"))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return MathUtils.bitSetFrom(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String makeAllNStringWithLength(int length) {
|
|
||||||
String s = "";
|
|
||||||
for (int i=0; i<length; i++)
|
|
||||||
s += "N";
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reverses the given array in place.
|
* Reverses the given array in place.
|
||||||
*
|
*
|
||||||
* @param array any array
|
* @param array any array
|
||||||
*/
|
*/
|
||||||
private static void reverse(final Comparable[] array) {
|
private static void reverse(final Object[] array) {
|
||||||
final int arrayLength = array.length;
|
final int arrayLength = array.length;
|
||||||
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
||||||
final Comparable temp = array[l];
|
final Object temp = array[l];
|
||||||
array[l] = array[r];
|
array[l] = array[r];
|
||||||
array[r] = temp;
|
array[r] = temp;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ public interface Covariate {
|
||||||
*/
|
*/
|
||||||
public CovariateValues getValues(GATKSAMRecord read);
|
public CovariateValues getValues(GATKSAMRecord read);
|
||||||
|
|
||||||
public Comparable getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RequiredCovariate extends Covariate {}
|
interface RequiredCovariate extends Covariate {}
|
||||||
|
|
|
||||||
|
|
@ -12,25 +12,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
* @since 2/8/12
|
* @since 2/8/12
|
||||||
*/
|
*/
|
||||||
public class CovariateValues {
|
public class CovariateValues {
|
||||||
private Comparable[] mismatches;
|
private Object[] mismatches;
|
||||||
private Comparable[] insertions;
|
private Object[] insertions;
|
||||||
private Comparable[] deletions;
|
private Object[] deletions;
|
||||||
|
|
||||||
public CovariateValues(Comparable[] mismatch, Comparable[] insertion, Comparable[] deletion) {
|
public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
|
||||||
this.mismatches = mismatch;
|
this.mismatches = mismatch;
|
||||||
this.insertions = insertion;
|
this.insertions = insertion;
|
||||||
this.deletions = deletion;
|
this.deletions = deletion;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable[] getMismatches() {
|
public Object[] getMismatches() {
|
||||||
return mismatches;
|
return mismatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable[] getInsertions() {
|
public Object[] getInsertions() {
|
||||||
return insertions;
|
return insertions;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable[] getDeletions() {
|
public Object[] getDeletions() {
|
||||||
return deletions;
|
return deletions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -198,7 +198,7 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
@Override
|
@Override
|
||||||
public final Comparable getValue(final String str) {
|
public final Object getValue(final String str) {
|
||||||
return Integer.parseInt(str);
|
return Integer.parseInt(str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
|
|
@ -67,7 +65,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
@Override
|
@Override
|
||||||
public final Comparable getValue(final String str) {
|
public final Object getValue(final String str) {
|
||||||
return Integer.parseInt(str);
|
return Integer.parseInt(str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -38,6 +39,10 @@ import java.util.Arrays;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ReadGroupCovariate implements RequiredCovariate {
|
public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
|
|
||||||
|
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
|
||||||
|
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
|
||||||
|
private short nextId = 0;
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -48,16 +53,29 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||||
final int l = read.getReadLength();
|
final int l = read.getReadLength();
|
||||||
final String readGroupId = read.getReadGroup().getReadGroupId();
|
final String readGroupId = read.getReadGroup().getReadGroupId();
|
||||||
String [] readGroups = new String[l];
|
short shortId;
|
||||||
Arrays.fill(readGroups, readGroupId);
|
if (readGroupLookupTable.containsKey(readGroupId))
|
||||||
|
shortId = readGroupLookupTable.get(readGroupId);
|
||||||
|
else {
|
||||||
|
shortId = nextId;
|
||||||
|
readGroupLookupTable.put(readGroupId, nextId);
|
||||||
|
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||||
|
nextId++;
|
||||||
|
}
|
||||||
|
Short [] readGroups = new Short[l];
|
||||||
|
Arrays.fill(readGroups, shortId);
|
||||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
@Override
|
@Override
|
||||||
public final Comparable getValue(final String str) {
|
public final Object getValue(final String str) {
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final String decodeReadGroup(final short id) {
|
||||||
|
return readGroupReverseLookupTable.get(id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,14 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -39,6 +45,63 @@ import org.broadinstitute.sting.commandline.Hidden;
|
||||||
|
|
||||||
public class RecalibrationArgumentCollection {
|
public class RecalibrationArgumentCollection {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference,
|
||||||
|
* so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.)
|
||||||
|
* for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
|
||||||
|
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
|
||||||
|
*/
|
||||||
|
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
|
||||||
|
protected List<RodBinding<Feature>> knownSites = Collections.emptyList();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||||
|
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||||
|
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||||
|
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||||
|
*/
|
||||||
|
@Gather(CountCovariatesGatherer.class)
|
||||||
|
@Output
|
||||||
|
protected PrintStream RECAL_FILE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all implemented covariates.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
|
||||||
|
protected boolean LIST_ONLY = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
|
||||||
|
protected String[] COVARIATES = null;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use the standard set of covariates in addition to the ones listed using the -cov argument
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
|
||||||
|
protected boolean USE_STANDARD_COVARIATES = true;
|
||||||
|
|
||||||
|
/////////////////////////////
|
||||||
|
// Debugging-only Arguments
|
||||||
|
/////////////////////////////
|
||||||
|
/**
|
||||||
|
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
|
||||||
|
*/
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
|
||||||
|
protected boolean RUN_WITHOUT_DBSNP = false;
|
||||||
|
|
||||||
|
/////////////////////////////
|
||||||
|
// protected Member Variables
|
||||||
|
/////////////////////////////
|
||||||
|
protected final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables)
|
||||||
|
protected final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>();// A list to hold the covariate objects that were requested
|
||||||
|
|
||||||
|
protected final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
|
||||||
|
protected final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||||
* reads which have had the reference inserted because of color space inconsistencies.
|
* reads which have had the reference inserted because of color space inconsistencies.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,226 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.utils.Median;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.text.DateFormat;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emits a GATKReport containing read group, sample, library, platform, center, sequencing data,
|
||||||
|
* paired end status, simple read type name (e.g. 2x76) median insert size and median read length
|
||||||
|
* for each read group in every provided BAM file
|
||||||
|
*
|
||||||
|
* Note that this walker stops when all read groups have been observed at least a few thousand times so that
|
||||||
|
* the median statistics are well determined. It is safe to run it WG and it'll finish in an appropriate
|
||||||
|
* timeframe.
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* Any number of BAM files
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* GATKReport containing read group, sample, library, platform, center, median insert size and median read length.
|
||||||
|
*
|
||||||
|
* For example, running this tool on the NA12878 data sets:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* ##:GATKReport.v0.2 ReadGroupProperties : Table of read group properties
|
||||||
|
* readgroup sample library platform center date has.any.reads is.paired.end n.reads.analyzed simple.read.type median.read.length median.insert.size
|
||||||
|
* 20FUK.1 NA12878 Solexa-18483 illumina BI 2/2/10 true true 498 2x101 101 386
|
||||||
|
* 20FUK.2 NA12878 Solexa-18484 illumina BI 2/2/10 true true 476 2x101 101 417
|
||||||
|
* 20FUK.3 NA12878 Solexa-18483 illumina BI 2/2/10 true true 407 2x101 101 387
|
||||||
|
* 20FUK.4 NA12878 Solexa-18484 illumina BI 2/2/10 true true 389 2x101 101 415
|
||||||
|
* 20FUK.5 NA12878 Solexa-18483 illumina BI 2/2/10 true true 433 2x101 101 386
|
||||||
|
* 20FUK.6 NA12878 Solexa-18484 illumina BI 2/2/10 true true 480 2x101 101 418
|
||||||
|
* 20FUK.7 NA12878 Solexa-18483 illumina BI 2/2/10 true true 450 2x101 101 386
|
||||||
|
* 20FUK.8 NA12878 Solexa-18484 illumina BI 2/2/10 true true 438 2x101 101 418
|
||||||
|
* 20GAV.1 NA12878 Solexa-18483 illumina BI 1/26/10 true true 490 2x101 101 391
|
||||||
|
* 20GAV.2 NA12878 Solexa-18484 illumina BI 1/26/10 true true 485 2x101 101 417
|
||||||
|
* 20GAV.3 NA12878 Solexa-18483 illumina BI 1/26/10 true true 460 2x101 101 392
|
||||||
|
* 20GAV.4 NA12878 Solexa-18484 illumina BI 1/26/10 true true 434 2x101 101 415
|
||||||
|
* 20GAV.5 NA12878 Solexa-18483 illumina BI 1/26/10 true true 479 2x101 101 389
|
||||||
|
* 20GAV.6 NA12878 Solexa-18484 illumina BI 1/26/10 true true 461 2x101 101 416
|
||||||
|
* 20GAV.7 NA12878 Solexa-18483 illumina BI 1/26/10 true true 509 2x101 101 386
|
||||||
|
* 20GAV.8 NA12878 Solexa-18484 illumina BI 1/26/10 true true 476 2x101 101 410 101 414
|
||||||
|
* </pre>
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -T ReadGroupProperties
|
||||||
|
* -I example1.bam -I example2.bam etc
|
||||||
|
* -R reference.fasta
|
||||||
|
* -o example.gatkreport.txt
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Mark DePristo
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
||||||
|
@Output
|
||||||
|
public PrintStream out;
|
||||||
|
|
||||||
|
@Argument(shortName="maxElementsForMedian", doc="Calculate median from the first maxElementsForMedian values observed", required=false)
|
||||||
|
public int MAX_VALUES_FOR_MEDIAN = 10000;
|
||||||
|
|
||||||
|
private final static String TABLE_NAME = "ReadGroupProperties";
|
||||||
|
private final Map<String, PerReadGroupInfo> readGroupInfo = new HashMap<String, PerReadGroupInfo>();
|
||||||
|
|
||||||
|
private class PerReadGroupInfo {
|
||||||
|
public final Median<Integer> readLength = new Median<Integer>(MAX_VALUES_FOR_MEDIAN);
|
||||||
|
public final Median<Integer> insertSize = new Median<Integer>(MAX_VALUES_FOR_MEDIAN);
|
||||||
|
public int nReadsSeen = 0, nReadsPaired = 0;
|
||||||
|
|
||||||
|
public boolean needsMoreData() {
|
||||||
|
return ! readLength.isFull() || ! insertSize.isFull();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
|
||||||
|
readGroupInfo.put(rg.getId(), new PerReadGroupInfo());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||||
|
return ! (read.getReadFailsVendorQualityCheckFlag() || read.getReadUnmappedFlag());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isDone() {
|
||||||
|
for ( PerReadGroupInfo info : readGroupInfo.values() ) {
|
||||||
|
if ( info.needsMoreData() )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer map(ReferenceContext referenceContext, GATKSAMRecord read, ReadMetaDataTracker readMetaDataTracker) {
|
||||||
|
final String rgID = read.getReadGroup().getId();
|
||||||
|
final PerReadGroupInfo info = readGroupInfo.get(rgID);
|
||||||
|
|
||||||
|
if ( info.needsMoreData() ) {
|
||||||
|
info.readLength.add(read.getReadLength());
|
||||||
|
info.nReadsSeen++;
|
||||||
|
if ( read.getReadPairedFlag() ) {
|
||||||
|
info.nReadsPaired++;
|
||||||
|
if ( read.getInferredInsertSize() != 0) {
|
||||||
|
info.insertSize.add(Math.abs(read.getInferredInsertSize()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer reduce(Integer integer, Integer integer1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Integer sum) {
|
||||||
|
final GATKReport report = new GATKReport();
|
||||||
|
report.addTable(TABLE_NAME, "Table of read group properties");
|
||||||
|
GATKReportTable table = report.getTable(TABLE_NAME);
|
||||||
|
DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT);
|
||||||
|
|
||||||
|
table.addPrimaryKey("readgroup");
|
||||||
|
//* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and
|
||||||
|
//* median read length for each read group in every BAM file.
|
||||||
|
table.addColumn("sample", "NA");
|
||||||
|
table.addColumn("library", "NA");
|
||||||
|
table.addColumn("platform", "NA");
|
||||||
|
table.addColumn("center", "NA");
|
||||||
|
table.addColumn("date", "NA");
|
||||||
|
table.addColumn("has.any.reads", "false");
|
||||||
|
table.addColumn("is.paired.end", "false");
|
||||||
|
table.addColumn("n.reads.analyzed", "NA");
|
||||||
|
table.addColumn("simple.read.type", "NA");
|
||||||
|
table.addColumn("median.read.length", Integer.valueOf(0));
|
||||||
|
table.addColumn("median.insert.size", Integer.valueOf(0));
|
||||||
|
|
||||||
|
for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
|
||||||
|
final String rgID = rg.getId();
|
||||||
|
PerReadGroupInfo info = readGroupInfo.get(rgID);
|
||||||
|
|
||||||
|
// we are paired if > 25% of reads are paired
|
||||||
|
final boolean isPaired = info.nReadsPaired / (1.0 * (info.nReadsSeen+1)) > 0.25;
|
||||||
|
final boolean hasAnyReads = info.nReadsSeen > 0;
|
||||||
|
final int readLength = info.readLength.getMedian(0);
|
||||||
|
|
||||||
|
setTableValue(table, rgID, "sample", rg.getSample());
|
||||||
|
setTableValue(table, rgID, "library", rg.getLibrary());
|
||||||
|
setTableValue(table, rgID, "platform", rg.getPlatform());
|
||||||
|
setTableValue(table, rgID, "center", rg.getSequencingCenter());
|
||||||
|
try {
|
||||||
|
setTableValue(table, rgID, "date", rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA");
|
||||||
|
} catch ( NullPointerException e ) {
|
||||||
|
// TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present
|
||||||
|
setTableValue(table, rgID, "date", "NA");
|
||||||
|
}
|
||||||
|
setTableValue(table, rgID, "has.any.reads", hasAnyReads);
|
||||||
|
setTableValue(table, rgID, "is.paired.end", isPaired);
|
||||||
|
setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen);
|
||||||
|
setTableValue(table, rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA");
|
||||||
|
setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA" );
|
||||||
|
setTableValue(table, rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA" );
|
||||||
|
}
|
||||||
|
|
||||||
|
report.print(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void setTableValue(GATKReportTable table, final String rgID, final String key, final Object value) {
|
||||||
|
table.set(rgID, key, value == null ? "NA" : value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -454,8 +454,7 @@ public class HaplotypeIndelErrorModel {
|
||||||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2)
|
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2)
|
||||||
// First term is approximated by Jacobian log with table lookup.
|
// First term is approximated by Jacobian log with table lookup.
|
||||||
// Second term is a constant added to both likelihoods so will be ignored
|
// Second term is a constant added to both likelihoods so will be ignored
|
||||||
haplotypeLikehoodMatrix[i][j] += MathUtils.softMax(readLikelihood[0],
|
haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]);
|
||||||
readLikelihood[1]);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -166,18 +166,17 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
||||||
|
|
||||||
matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
|
matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]});
|
||||||
YMetricArray[im1][jm1] + pBaseRead);
|
|
||||||
|
|
||||||
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
||||||
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
||||||
|
|
||||||
XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
|
XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
|
||||||
|
|
||||||
// update Y array
|
// update Y array
|
||||||
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
||||||
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
||||||
YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
|
YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -316,9 +315,7 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
|
|
||||||
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
|
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
|
||||||
final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
|
final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] });
|
||||||
XMetricArray[bestI][bestJ],
|
|
||||||
YMetricArray[bestI][bestJ]);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
|
|
@ -651,7 +648,7 @@ public class PairHMMIndelErrorModel {
|
||||||
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
|
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
|
||||||
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
|
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
|
||||||
|
|
||||||
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
|
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplified to just a single loop without the intermediate NxN matrix
|
||||||
for (int i=0; i < numHaplotypes; i++) {
|
for (int i=0; i < numHaplotypes; i++) {
|
||||||
for (int j=i; j < numHaplotypes; j++){
|
for (int j=i; j < numHaplotypes; j++){
|
||||||
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
||||||
|
|
@ -665,7 +662,7 @@ public class PairHMMIndelErrorModel {
|
||||||
final double li = readLikelihoods[readIdx][i];
|
final double li = readLikelihoods[readIdx][i];
|
||||||
final double lj = readLikelihoods[readIdx][j];
|
final double lj = readLikelihoods[readIdx][j];
|
||||||
final int readCount = readCounts[readIdx];
|
final int readCount = readCounts[readIdx];
|
||||||
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
|
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + LOG_ONE_HALF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -678,7 +675,7 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// renormalize so that max element is zero.
|
// renormalize so that max element is zero.
|
||||||
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
|
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,13 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
|
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
|
||||||
boolean lowerCaseSNPs = false;
|
boolean lowerCaseSNPs = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If onlyOutputValidAmplicons is true, the output fasta file will contain only valid sequences.
|
||||||
|
* Useful for producing delivery-ready files.
|
||||||
|
*/
|
||||||
|
@Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false)
|
||||||
|
boolean onlyOutputValidAmplicons = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
|
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
|
||||||
* This changes the size of the k-mer used for alignment.
|
* This changes the size of the k-mer used for alignment.
|
||||||
|
|
@ -486,14 +493,16 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
valid = "Valid";
|
valid = "Valid";
|
||||||
}
|
}
|
||||||
|
|
||||||
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
|
||||||
|
|
||||||
if (!sequenomOutput)
|
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
|
||||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
||||||
else {
|
if (!sequenomOutput)
|
||||||
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||||
probeName = probeName.replace("amplicon_","a");
|
else {
|
||||||
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
||||||
|
probeName = probeName.replace("amplicon_","a");
|
||||||
|
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -140,15 +140,16 @@ public class GaussianMixtureModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
for( final VariantDatum datum : data ) {
|
for( final VariantDatum datum : data ) {
|
||||||
final ArrayList<Double> pVarInGaussianLog10 = new ArrayList<Double>( gaussians.size() );
|
final double[] pVarInGaussianLog10 = new double[gaussians.size()];
|
||||||
|
int gaussianIndex = 0;
|
||||||
for( final MultivariateGaussian gaussian : gaussians ) {
|
for( final MultivariateGaussian gaussian : gaussians ) {
|
||||||
final double pVarLog10 = gaussian.evaluateDatumLog10( datum );
|
final double pVarLog10 = gaussian.evaluateDatumLog10( datum );
|
||||||
pVarInGaussianLog10.add( pVarLog10 );
|
pVarInGaussianLog10[gaussianIndex++] = pVarLog10;
|
||||||
}
|
}
|
||||||
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10 );
|
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false );
|
||||||
int iii = 0;
|
gaussianIndex = 0;
|
||||||
for( final MultivariateGaussian gaussian : gaussians ) {
|
for( final MultivariateGaussian gaussian : gaussians ) {
|
||||||
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[iii++] ); //BUGBUG: to clean up
|
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -372,6 +372,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
stream.println("library(ggplot2)");
|
stream.println("library(ggplot2)");
|
||||||
// For compactPDF in R 2.13+
|
// For compactPDF in R 2.13+
|
||||||
stream.println("library(tools)");
|
stream.println("library(tools)");
|
||||||
|
// For graphical functions R 2.14.2+
|
||||||
|
stream.println("library(grid)");
|
||||||
|
|
||||||
createArrangeFunction( stream );
|
createArrangeFunction( stream );
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
|
|
@ -113,24 +114,28 @@ public class Haplotype {
|
||||||
return isReference;
|
return isReference;
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final byte[] paddedRef, final int refStart,
|
@Requires({"refInsertLocation >= 0", "hapStartInRefCoords >= 0"})
|
||||||
final Cigar haplotypeCigar, final int numBasesAddedToStartOfHaplotype, final int refHaplotypeLength ) {
|
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) {
|
||||||
|
|
||||||
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
|
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
|
||||||
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation);
|
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(hapStartInRefCoords, haplotypeCigar, refInsertLocation);
|
||||||
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||||
return getBases().clone();
|
return bases.clone();
|
||||||
}
|
}
|
||||||
haplotypeInsertLocation += numBasesAddedToStartOfHaplotype;
|
byte[] newHaplotype;
|
||||||
final byte[] newHaplotype = getBases().clone();
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
|
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
|
||||||
|
newHaplotype = bases.clone();
|
||||||
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
||||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||||
}
|
}
|
||||||
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
||||||
final int altAlleleLength = altAllele.length();
|
final int altAlleleLength = altAllele.length();
|
||||||
|
newHaplotype = new byte[bases.length + altAlleleLength];
|
||||||
|
for( int iii = 0; iii < bases.length; iii++ ) {
|
||||||
|
newHaplotype[iii] = bases[iii];
|
||||||
|
}
|
||||||
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
|
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
|
||||||
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
||||||
}
|
}
|
||||||
|
|
@ -138,24 +143,17 @@ public class Haplotype {
|
||||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||||
}
|
}
|
||||||
} else { // deletion
|
} else { // deletion
|
||||||
int refHaplotypeOffset = 0;
|
|
||||||
for( final CigarElement ce : haplotypeCigar.getCigarElements()) {
|
|
||||||
if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); }
|
|
||||||
else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); }
|
|
||||||
}
|
|
||||||
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
|
||||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
|
||||||
}
|
|
||||||
final int shift = refAllele.length() - altAllele.length();
|
final int shift = refAllele.length() - altAllele.length();
|
||||||
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) {
|
newHaplotype = new byte[bases.length - shift];
|
||||||
newHaplotype[iii] = newHaplotype[iii+shift];
|
for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
|
||||||
|
newHaplotype[iii] = bases[iii];
|
||||||
}
|
}
|
||||||
for( int iii = 0; iii < shift; iii++ ) {
|
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
|
||||||
newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii];
|
newHaplotype[iii] = bases[iii+shift];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
|
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
|
||||||
return getBases().clone();
|
return bases.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
return newHaplotype;
|
return newHaplotype;
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
|
|
@ -40,14 +41,6 @@ import java.util.*;
|
||||||
* @author Kiran Garimella
|
* @author Kiran Garimella
|
||||||
*/
|
*/
|
||||||
public class MathUtils {
|
public class MathUtils {
|
||||||
/** Public constants - used for the Lanczos approximation to the factorial function
|
|
||||||
* (for the calculation of the binomial/multinomial probability in logspace)
|
|
||||||
* @param LANC_SEQ[] - an array holding the constants which correspond to the product
|
|
||||||
* of Chebyshev Polynomial coefficients, and points on the Gamma function (for interpolation)
|
|
||||||
* [see A Precision Approximation of the Gamma Function J. SIAM Numer. Anal. Ser. B, Vol. 1 1964. pp. 86-96]
|
|
||||||
* @param LANC_G - a value for the Lanczos approximation to the gamma function that works to
|
|
||||||
* high precision
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Private constructor. No instantiating this class!
|
* Private constructor. No instantiating this class!
|
||||||
|
|
@ -55,6 +48,28 @@ public class MathUtils {
|
||||||
private MathUtils() {
|
private MathUtils() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final double[] log10Cache;
|
||||||
|
private static final double[] jacobianLogTable;
|
||||||
|
private static final double JACOBIAN_LOG_TABLE_STEP = 0.001;
|
||||||
|
private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
|
||||||
|
private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1;
|
||||||
|
private static final int MAXN = 11000;
|
||||||
|
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
||||||
|
|
||||||
|
static {
|
||||||
|
log10Cache = new double[LOG10_CACHE_SIZE];
|
||||||
|
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
||||||
|
|
||||||
|
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
||||||
|
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
|
||||||
|
log10Cache[k] = Math.log10(k);
|
||||||
|
|
||||||
|
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||||
|
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// A fast implementation of the Math.round() method. This method does not perform
|
// A fast implementation of the Math.round() method. This method does not perform
|
||||||
// under/overflow checking, so this shouldn't be used in the general case (but is fine
|
// under/overflow checking, so this shouldn't be used in the general case (but is fine
|
||||||
// if one is already make those checks before calling in to the rounding).
|
// if one is already make those checks before calling in to the rounding).
|
||||||
|
|
@ -535,7 +550,7 @@ public class MathUtils {
|
||||||
// all negative) the largest value; also, we need to convert to normal-space.
|
// all negative) the largest value; also, we need to convert to normal-space.
|
||||||
double maxValue = Utils.findMaxEntry(array);
|
double maxValue = Utils.findMaxEntry(array);
|
||||||
|
|
||||||
// we may decide to just normalize in log space with converting to linear space
|
// we may decide to just normalize in log space without converting to linear space
|
||||||
if (keepInLogSpace) {
|
if (keepInLogSpace) {
|
||||||
for (int i = 0; i < array.length; i++)
|
for (int i = 0; i < array.length; i++)
|
||||||
array[i] -= maxValue;
|
array[i] -= maxValue;
|
||||||
|
|
@ -562,29 +577,6 @@ public class MathUtils {
|
||||||
return normalized;
|
return normalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double[] normalizeFromLog10(List<Double> array, boolean takeLog10OfOutput) {
|
|
||||||
double[] normalized = new double[array.size()];
|
|
||||||
|
|
||||||
// for precision purposes, we need to add (or really subtract, since they're
|
|
||||||
// all negative) the largest value; also, we need to convert to normal-space.
|
|
||||||
double maxValue = MathUtils.arrayMaxDouble(array);
|
|
||||||
for (int i = 0; i < array.size(); i++)
|
|
||||||
normalized[i] = Math.pow(10, array.get(i) - maxValue);
|
|
||||||
|
|
||||||
// normalize
|
|
||||||
double sum = 0.0;
|
|
||||||
for (int i = 0; i < array.size(); i++)
|
|
||||||
sum += normalized[i];
|
|
||||||
for (int i = 0; i < array.size(); i++) {
|
|
||||||
double x = normalized[i] / sum;
|
|
||||||
if (takeLog10OfOutput)
|
|
||||||
x = Math.log10(x);
|
|
||||||
normalized[i] = x;
|
|
||||||
}
|
|
||||||
|
|
||||||
return normalized;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
|
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
|
||||||
*
|
*
|
||||||
|
|
@ -595,10 +587,6 @@ public class MathUtils {
|
||||||
return normalizeFromLog10(array, false);
|
return normalizeFromLog10(array, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double[] normalizeFromLog10(List<Double> array) {
|
|
||||||
return normalizeFromLog10(array, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int maxElementIndex(final double[] array) {
|
public static int maxElementIndex(final double[] array) {
|
||||||
return maxElementIndex(array, array.length);
|
return maxElementIndex(array, array.length);
|
||||||
}
|
}
|
||||||
|
|
@ -1206,78 +1194,11 @@ public class MathUtils {
|
||||||
return ((double) num) / (Math.max(denom, 1));
|
return ((double) num) / (Math.max(denom, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final double[] log10Cache;
|
|
||||||
public static final double[] jacobianLogTable;
|
|
||||||
public static final int JACOBIAN_LOG_TABLE_SIZE = 101;
|
|
||||||
public static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
|
|
||||||
public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP;
|
|
||||||
public static final double MAX_JACOBIAN_TOLERANCE = 10.0;
|
|
||||||
private static final int MAXN = 11000;
|
|
||||||
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
|
||||||
|
|
||||||
static {
|
|
||||||
log10Cache = new double[LOG10_CACHE_SIZE];
|
|
||||||
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
|
||||||
|
|
||||||
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
|
||||||
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
|
|
||||||
log10Cache[k] = Math.log10(k);
|
|
||||||
|
|
||||||
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
|
||||||
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static public double softMax(final double[] vec) {
|
|
||||||
double acc = vec[0];
|
|
||||||
for (int k = 1; k < vec.length; k++)
|
|
||||||
acc = softMax(acc, vec[k]);
|
|
||||||
|
|
||||||
return acc;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static public double max(double x0, double x1, double x2) {
|
static public double max(double x0, double x1, double x2) {
|
||||||
double a = Math.max(x0, x1);
|
double a = Math.max(x0, x1);
|
||||||
return Math.max(a, x2);
|
return Math.max(a, x2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static public double softMax(final double x0, final double x1, final double x2) {
|
|
||||||
// compute naively log10(10^x[0] + 10^x[1]+...)
|
|
||||||
// return Math.log10(MathUtils.sumLog10(vec));
|
|
||||||
|
|
||||||
// better approximation: do Jacobian logarithm function on data pairs
|
|
||||||
double a = softMax(x0, x1);
|
|
||||||
return softMax(a, x2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public double softMax(final double x, final double y) {
|
|
||||||
// we need to compute log10(10^x + 10^y)
|
|
||||||
// By Jacobian logarithm identity, this is equal to
|
|
||||||
// max(x,y) + log10(1+10^-abs(x-y))
|
|
||||||
// we compute the second term as a table lookup
|
|
||||||
// with integer quantization
|
|
||||||
|
|
||||||
// slow exact version:
|
|
||||||
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
|
|
||||||
|
|
||||||
double diff = x - y;
|
|
||||||
|
|
||||||
if (diff > MAX_JACOBIAN_TOLERANCE)
|
|
||||||
return x;
|
|
||||||
else if (diff < -MAX_JACOBIAN_TOLERANCE)
|
|
||||||
return y;
|
|
||||||
else if (diff >= 0) {
|
|
||||||
int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
|
|
||||||
return x + jacobianLogTable[ind];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
|
|
||||||
return y + jacobianLogTable[ind];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static double phredScaleToProbability(byte q) {
|
public static double phredScaleToProbability(byte q) {
|
||||||
return Math.pow(10, (-q) / 10.0);
|
return Math.pow(10, (-q) / 10.0);
|
||||||
}
|
}
|
||||||
|
|
@ -1619,30 +1540,118 @@ public class MathUtils {
|
||||||
* @param bitSet the bitset
|
* @param bitSet the bitset
|
||||||
* @return an integer with the bitset representation
|
* @return an integer with the bitset representation
|
||||||
*/
|
*/
|
||||||
public static int intFrom(final BitSet bitSet) {
|
public static long intFrom(final BitSet bitSet) {
|
||||||
int integer = 0;
|
long number = 0;
|
||||||
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
|
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
|
||||||
integer |= 1 << bitIndex;
|
number |= 1L << bitIndex;
|
||||||
|
|
||||||
return integer;
|
return number;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a BitSet representation of a given integer
|
* Creates a BitSet representation of a given integer
|
||||||
*
|
*
|
||||||
* @param integer the number to turn into a bitset
|
* @param number the number to turn into a bitset
|
||||||
* @return a bitset representation of the integer
|
* @return a bitset representation of the integer
|
||||||
*/
|
*/
|
||||||
public static BitSet bitSetFrom(int integer) {
|
public static BitSet bitSetFrom(long number) {
|
||||||
BitSet bitSet = new BitSet((int) Math.ceil(Math.sqrt(integer)));
|
BitSet bitSet = new BitSet();
|
||||||
int bitIndex = 0;
|
int bitIndex = 0;
|
||||||
while (integer > 0) {
|
while (number > 0) {
|
||||||
if (integer%2 > 0)
|
if (number%2 > 0)
|
||||||
bitSet.set(bitIndex);
|
bitSet.set(bitIndex);
|
||||||
bitIndex++;
|
bitIndex++;
|
||||||
integer /= 2;
|
number /= 2;
|
||||||
}
|
}
|
||||||
return bitSet;
|
return bitSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a BitSet into the dna string representation.
|
||||||
|
*
|
||||||
|
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||||
|
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||||
|
* a bitSetFrom(BigNumber) method.
|
||||||
|
*
|
||||||
|
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||||
|
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||||
|
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||||
|
* as 0's and leading 0's are omitted).
|
||||||
|
*
|
||||||
|
* quasi-canonical because A is represented by a 0, therefore,
|
||||||
|
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||||
|
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||||
|
*
|
||||||
|
* but we can correctly decode it because we know the final length.
|
||||||
|
*
|
||||||
|
* @param bitSet the bitset representation of the dna sequence
|
||||||
|
* @return the dna sequence represented by the bitset
|
||||||
|
*/
|
||||||
|
public static String dnaFrom(final BitSet bitSet) {
|
||||||
|
long number = intFrom(bitSet); // the base_10 representation of the bit set
|
||||||
|
long preContext = 0; // the number of combinations skipped to get to the quasi-canonical representation (we keep it to subtract later)
|
||||||
|
long nextContext = 4; // the next context (we advance it so we know which one was preceding it).
|
||||||
|
int i = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||||
|
while (nextContext <= number) { // find the length of the dna string (i)
|
||||||
|
preContext = nextContext; // keep track of the number of combinations in the preceding context
|
||||||
|
nextContext += Math.pow(4, ++i);// calculate the next context
|
||||||
|
}
|
||||||
|
number -= preContext; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||||
|
|
||||||
|
String dna = "";
|
||||||
|
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||||
|
byte base = (byte) (number % 4);
|
||||||
|
switch (base) {
|
||||||
|
case 0 : dna = "A" + dna; break;
|
||||||
|
case 1 : dna = "C" + dna; break;
|
||||||
|
case 2 : dna = "G" + dna; break;
|
||||||
|
case 3 : dna = "T" + dna; break;
|
||||||
|
}
|
||||||
|
number /= 4;
|
||||||
|
}
|
||||||
|
for (int j = dna.length(); j < i; j++)
|
||||||
|
dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||||
|
|
||||||
|
return dna;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a BitSet representation of a given dna string.
|
||||||
|
*
|
||||||
|
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||||
|
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||||
|
* a bitSetFrom(BigNumber) method.
|
||||||
|
*
|
||||||
|
* The bit representation of a dna string is the simple:
|
||||||
|
* 0 A 4 AA 8 CA
|
||||||
|
* 1 C 5 AC ...
|
||||||
|
* 2 G 6 AG 1343 TTGGT
|
||||||
|
* 3 T 7 AT 1364 TTTTT
|
||||||
|
*
|
||||||
|
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||||
|
* preceded the string (with smaller lengths).
|
||||||
|
*
|
||||||
|
* @param dna the dna sequence
|
||||||
|
* @return the bitset representing the dna sequence
|
||||||
|
*/
|
||||||
|
public static BitSet bitSetFrom(String dna) {
|
||||||
|
if (dna.length() > 31)
|
||||||
|
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than 31. dna: %s (%d)", dna, dna.length()));
|
||||||
|
|
||||||
|
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
|
||||||
|
long preContext = 0; // the sum of all combinations that preceded the length of the dna string
|
||||||
|
for (int i=0; i<dna.length(); i++) {
|
||||||
|
baseTen *= 4;
|
||||||
|
switch(dna.charAt(i)) {
|
||||||
|
case 'A': baseTen += 0; break;
|
||||||
|
case 'C': baseTen += 1; break;
|
||||||
|
case 'G': baseTen += 2; break;
|
||||||
|
case 'T': baseTen += 3; break;
|
||||||
|
}
|
||||||
|
if (i>0)
|
||||||
|
preContext += Math.pow(4, i); // each length will have 4^i combinations (e.g 1 = 4, 2 = 16, 3 = 64, ...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,93 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class for calculating median from a data set, potentially limiting the size of data to a
|
||||||
|
* fixed amount
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class Median<T extends Comparable> {
|
||||||
|
final List<T> values;
|
||||||
|
final int maxValuesToKeep;
|
||||||
|
boolean sorted = false;
|
||||||
|
|
||||||
|
public Median() {
|
||||||
|
this(Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Median(final int maxValuesToKeep) {
|
||||||
|
this.maxValuesToKeep = maxValuesToKeep;
|
||||||
|
this.values = new ArrayList<T>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFull() {
|
||||||
|
return values.size() >= maxValuesToKeep;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return values.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return values.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public T getMedian() {
|
||||||
|
if ( isEmpty() )
|
||||||
|
throw new IllegalStateException("Cannot get median value from empty array");
|
||||||
|
return getMedian(null); // note that value null will never be used
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the floor(n + 1 / 2) item from the list of values if the list
|
||||||
|
* has values, or defaultValue is the list is empty.
|
||||||
|
*/
|
||||||
|
public T getMedian(final T defaultValue) {
|
||||||
|
if ( isEmpty() )
|
||||||
|
return defaultValue;
|
||||||
|
|
||||||
|
if ( ! sorted ) {
|
||||||
|
sorted = true;
|
||||||
|
Collections.sort(values);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int offset = (int)Math.floor((values.size() + 1) * 0.5) - 1;
|
||||||
|
return values.get(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean add(final T value) {
|
||||||
|
if ( ! isFull() ) {
|
||||||
|
sorted = false;
|
||||||
|
return values.add(value);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -314,6 +315,15 @@ public class ClippingOp {
|
||||||
if (start == 0)
|
if (start == 0)
|
||||||
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
|
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
|
||||||
|
|
||||||
|
if (read.hasBaseIndelQualities()) {
|
||||||
|
byte[] newBaseInsertionQuals = new byte[newLength];
|
||||||
|
byte[] newBaseDeletionQuals = new byte[newLength];
|
||||||
|
System.arraycopy(read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength);
|
||||||
|
System.arraycopy(read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength);
|
||||||
|
hardClippedRead.setBaseQualities(newBaseInsertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
|
||||||
|
hardClippedRead.setBaseQualities(newBaseDeletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
|
||||||
|
}
|
||||||
|
|
||||||
return hardClippedRead;
|
return hardClippedRead;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ import java.util.Map;
|
||||||
* Date: Dec 29, 2009
|
* Date: Dec 29, 2009
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class NestedHashMap{
|
public class NestedHashMap {
|
||||||
|
|
||||||
public final Map data = new HashMap<Object, Object>();
|
public final Map data = new HashMap<Object, Object>();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,390 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.crypt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||||
|
|
||||||
|
import javax.crypto.Cipher;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.security.*;
|
||||||
|
import java.security.spec.InvalidKeySpecException;
|
||||||
|
import java.security.spec.KeySpec;
|
||||||
|
import java.security.spec.PKCS8EncodedKeySpec;
|
||||||
|
import java.security.spec.X509EncodedKeySpec;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A set of cryptographic utility methods and constants.
|
||||||
|
*
|
||||||
|
* Contains methods to:
|
||||||
|
*
|
||||||
|
* -Create a public/private key pair
|
||||||
|
* -Read and write public/private keys to/from files/streams
|
||||||
|
* -Load the GATK master private/public keys
|
||||||
|
* -Encrypt/decrypt data
|
||||||
|
*
|
||||||
|
* Also contains constants that control the cryptographic defaults
|
||||||
|
* throughout the GATK.
|
||||||
|
*
|
||||||
|
* @author David Roazen
|
||||||
|
*/
|
||||||
|
public class CryptUtils {
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
// Constants (these control the default cryptographic settings throughout the GATK):
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default key length in bits of newly-created keys. 2048 bits provides a good balance between
|
||||||
|
* security and speed.
|
||||||
|
*/
|
||||||
|
public static final int DEFAULT_KEY_LENGTH = 2048;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default encryption algorithm to use, when none is specified.
|
||||||
|
*/
|
||||||
|
public static final String DEFAULT_ENCRYPTION_ALGORITHM = "RSA";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default random-number generation algorithm to use, when none is specified.
|
||||||
|
*/
|
||||||
|
public static final String DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM = "SHA1PRNG";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Name of the public key file distributed with the GATK. This file is packaged
|
||||||
|
* into the GATK jar, and we use the system ClassLoader to find it.
|
||||||
|
*/
|
||||||
|
public static final String GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME = "GATK_public.key";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Location of the master copy of the GATK private key.
|
||||||
|
*/
|
||||||
|
public static final String GATK_MASTER_PRIVATE_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_private.key";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Location of the master copy of the GATK public key. This file should always be the same as
|
||||||
|
* the public key file distributed with the GATK (and there are automated tests to ensure that it is).
|
||||||
|
*/
|
||||||
|
public static final String GATK_MASTER_PUBLIC_KEY_FILE = "/humgen/gsa-hpprojects/GATK/data/gatk_master_keys/GATK_public.key";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Directory where generated GATK user keys are stored. See the GATKKey class for more information.
|
||||||
|
*/
|
||||||
|
public static final String GATK_USER_KEY_DIRECTORY = "/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/";
|
||||||
|
|
||||||
|
|
||||||
|
// -----------------------
|
||||||
|
// Utility Methods:
|
||||||
|
// -----------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a new public/private key pair using the default encryption settings defined above.
|
||||||
|
*
|
||||||
|
* @return A new public/private key pair created using the default settings
|
||||||
|
*/
|
||||||
|
public static KeyPair generateKeyPair() {
|
||||||
|
return generateKeyPair(DEFAULT_KEY_LENGTH, DEFAULT_ENCRYPTION_ALGORITHM, DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a new public/private key pair using custom encryption settings.
|
||||||
|
*
|
||||||
|
* @param keyLength Length of the key in bits
|
||||||
|
* @param encryptionAlgorithm Encryption algorithm to use
|
||||||
|
* @param randNumberAlgorithm Random-number generation algorithm to use
|
||||||
|
* @return A new public/private key pair, created according to the specified parameters
|
||||||
|
*/
|
||||||
|
public static KeyPair generateKeyPair( int keyLength, String encryptionAlgorithm, String randNumberAlgorithm ) {
|
||||||
|
try {
|
||||||
|
KeyPairGenerator keyGen = KeyPairGenerator.getInstance(encryptionAlgorithm);
|
||||||
|
SecureRandom randomnessSource = createRandomnessSource(randNumberAlgorithm);
|
||||||
|
|
||||||
|
keyGen.initialize(keyLength, randomnessSource);
|
||||||
|
return keyGen.generateKeyPair();
|
||||||
|
}
|
||||||
|
catch ( NoSuchAlgorithmException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
|
||||||
|
}
|
||||||
|
catch ( Exception e ) {
|
||||||
|
throw new ReviewedStingException("Error while generating key pair", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a source of randomness using the default random-number generation algorithm.
|
||||||
|
*
|
||||||
|
* @return A randomness source that uses the default algorithm
|
||||||
|
*/
|
||||||
|
public static SecureRandom createRandomnessSource() {
|
||||||
|
return createRandomnessSource(DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a source of randomness using a custom random-number generation algorithm.
|
||||||
|
*
|
||||||
|
* @param randAlgorithm The random-number generation algorithm to use
|
||||||
|
* @return A randomness sources that uses the specified algorithm
|
||||||
|
*/
|
||||||
|
public static SecureRandom createRandomnessSource ( String randAlgorithm ) {
|
||||||
|
try {
|
||||||
|
return SecureRandom.getInstance(randAlgorithm);
|
||||||
|
}
|
||||||
|
catch ( NoSuchAlgorithmException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Could not find an implementation of the requested random-number generation algorithm %s", randAlgorithm), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a public/private key pair to disk
|
||||||
|
*
|
||||||
|
* @param keyPair The key pair we're writing to disk
|
||||||
|
* @param privateKeyFile Location to write the private key
|
||||||
|
* @param publicKeyFile Location to write the public key
|
||||||
|
*/
|
||||||
|
public static void writeKeyPair ( KeyPair keyPair, File privateKeyFile, File publicKeyFile ) {
|
||||||
|
writeKey(keyPair.getPrivate(), privateKeyFile);
|
||||||
|
writeKey(keyPair.getPublic(), publicKeyFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes an arbitrary key to disk
|
||||||
|
*
|
||||||
|
* @param key The key to write
|
||||||
|
* @param destination Location to write the key to
|
||||||
|
*/
|
||||||
|
public static void writeKey ( Key key, File destination ) {
|
||||||
|
IOUtils.writeByteArrayToFile(key.getEncoded(), destination);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads in a public key created using the default encryption algorithm from a file.
|
||||||
|
*
|
||||||
|
* @param source File containing the public key
|
||||||
|
* @return The public key read
|
||||||
|
*/
|
||||||
|
public static PublicKey readPublicKey ( File source ) {
|
||||||
|
return decodePublicKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads in a public key created using the default encryption algorithm from a stream.
|
||||||
|
*
|
||||||
|
* @param source Stream attached to the public key
|
||||||
|
* @return The public key read
|
||||||
|
*/
|
||||||
|
public static PublicKey readPublicKey ( InputStream source ) {
|
||||||
|
return decodePublicKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes the raw bytes of a public key into a usable object.
|
||||||
|
*
|
||||||
|
* @param rawKey The encoded bytes of a public key as read from, eg., a file. The
|
||||||
|
* key must be in the standard X.509 format for a public key.
|
||||||
|
* @param encryptionAlgorithm The encryption algorithm used to create the public key
|
||||||
|
* @return The public key as a usable object
|
||||||
|
*/
|
||||||
|
public static PublicKey decodePublicKey ( byte[] rawKey, String encryptionAlgorithm ) {
|
||||||
|
try {
|
||||||
|
KeySpec keySpec = new X509EncodedKeySpec(rawKey);
|
||||||
|
KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm);
|
||||||
|
return keyFactory.generatePublic(keySpec);
|
||||||
|
}
|
||||||
|
catch ( NoSuchAlgorithmException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
|
||||||
|
}
|
||||||
|
catch ( InvalidKeySpecException e ) {
|
||||||
|
throw new ReviewedStingException("Unable to use X.509 key specification to decode the given key", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads in a private key created using the default encryption algorithm from a file.
|
||||||
|
*
|
||||||
|
* @param source File containing the private key
|
||||||
|
* @return The private key read
|
||||||
|
*/
|
||||||
|
public static PrivateKey readPrivateKey ( File source ) {
|
||||||
|
return decodePrivateKey(IOUtils.readFileIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads in a private key created using the default encryption algorithm from a stream.
|
||||||
|
*
|
||||||
|
* @param source Stream attached to the private key
|
||||||
|
* @return The private key read
|
||||||
|
*/
|
||||||
|
public static PrivateKey readPrivateKey ( InputStream source ) {
|
||||||
|
return decodePrivateKey(IOUtils.readStreamIntoByteArray(source), DEFAULT_ENCRYPTION_ALGORITHM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes the raw bytes of a private key into a usable object.
|
||||||
|
*
|
||||||
|
* @param rawKey The encoded bytes of a private key as read from, eg., a file. The
|
||||||
|
* key must be in the standard PKCS #8 format for a private key.
|
||||||
|
* @param encryptionAlgorithm The encryption algorithm used to create the private key
|
||||||
|
* @return The private key as a usable object
|
||||||
|
*/
|
||||||
|
public static PrivateKey decodePrivateKey ( byte[] rawKey, String encryptionAlgorithm ) {
|
||||||
|
try {
|
||||||
|
KeySpec keySpec = new PKCS8EncodedKeySpec(rawKey);
|
||||||
|
KeyFactory keyFactory = KeyFactory.getInstance(encryptionAlgorithm);
|
||||||
|
return keyFactory.generatePrivate(keySpec);
|
||||||
|
}
|
||||||
|
catch ( NoSuchAlgorithmException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Could not find an implementation of the requested encryption algorithm %s", encryptionAlgorithm), e);
|
||||||
|
}
|
||||||
|
catch ( InvalidKeySpecException e ) {
|
||||||
|
throw new ReviewedStingException("Unable to use the PKCS #8 key specification to decode the given key", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads the copy of the GATK public key that is distributed with the GATK. Uses the system
|
||||||
|
* ClassLoader to locate the public key file, which should be stored at the root of the GATK
|
||||||
|
* jar file.
|
||||||
|
*
|
||||||
|
* @return The GATK public key as a usable object
|
||||||
|
*/
|
||||||
|
public static PublicKey loadGATKDistributedPublicKey() {
|
||||||
|
InputStream publicKeyInputStream = ClassLoader.getSystemResourceAsStream(GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME);
|
||||||
|
|
||||||
|
if ( publicKeyInputStream == null ) {
|
||||||
|
throw new ReviewedStingException(String.format("Could not locate the GATK public key %s in the classpath",
|
||||||
|
GATK_DISTRIBUTED_PUBLIC_KEY_FILE_NAME));
|
||||||
|
}
|
||||||
|
|
||||||
|
return readPublicKey(publicKeyInputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads the master copy of the GATK private key. You must have the appropriate UNIX permissions
|
||||||
|
* to do this!
|
||||||
|
*
|
||||||
|
* @return The GATK master private key as a usable object
|
||||||
|
*/
|
||||||
|
public static PrivateKey loadGATKMasterPrivateKey() {
|
||||||
|
return readPrivateKey(new File(GATK_MASTER_PRIVATE_KEY_FILE));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads the master copy of the GATK public key. This should always be the same as the
|
||||||
|
* public key distributed with the GATK returned by loadGATKDistributedPublicKey().
|
||||||
|
*
|
||||||
|
* @return The GATK master public key as a usable object
|
||||||
|
*/
|
||||||
|
public static PublicKey loadGATKMasterPublicKey() {
|
||||||
|
return readPublicKey(new File(GATK_MASTER_PUBLIC_KEY_FILE));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encrypts the given data using the key provided.
|
||||||
|
*
|
||||||
|
* @param data The data to encrypt, as a byte array
|
||||||
|
* @param encryptKey The key with which to encrypt the data
|
||||||
|
* @return The encrypted version of the provided data
|
||||||
|
*/
|
||||||
|
public static byte[] encryptData ( byte[] data, Key encryptKey ) {
|
||||||
|
return transformDataUsingCipher(data, encryptKey, Cipher.ENCRYPT_MODE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decrypts the given data using the key provided.
|
||||||
|
*
|
||||||
|
* @param encryptedData Data to decrypt, as a byte array
|
||||||
|
* @param decryptKey The key with which to decrypt the data
|
||||||
|
* @return The decrypted version of the provided data
|
||||||
|
*/
|
||||||
|
public static byte[] decryptData ( byte[] encryptedData, Key decryptKey ) {
|
||||||
|
return transformDataUsingCipher(encryptedData, decryptKey, Cipher.DECRYPT_MODE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method for encryption/decryption that takes data and processes it using
|
||||||
|
* the given key
|
||||||
|
*
|
||||||
|
* @param data Data to encrypt/decrypt
|
||||||
|
* @param key Key to use to encrypt/decrypt the data
|
||||||
|
* @param cipherMode Specifies whether we are encrypting or decrypting
|
||||||
|
* @return The encrypted/decrypted data
|
||||||
|
*/
|
||||||
|
private static byte[] transformDataUsingCipher ( byte[] data, Key key, int cipherMode ) {
|
||||||
|
try {
|
||||||
|
Cipher cipher = Cipher.getInstance(key.getAlgorithm());
|
||||||
|
cipher.init(cipherMode, key);
|
||||||
|
return cipher.doFinal(data);
|
||||||
|
}
|
||||||
|
catch ( NoSuchAlgorithmException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Could not find an implementation of the requested algorithm %s",
|
||||||
|
key.getAlgorithm()), e);
|
||||||
|
}
|
||||||
|
catch ( InvalidKeyException e ) {
|
||||||
|
throw new ReviewedStingException("Key is invalid", e);
|
||||||
|
}
|
||||||
|
catch ( GeneralSecurityException e ) {
|
||||||
|
throw new ReviewedStingException("Error during encryption", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests whether the public/private keys provided can each decrypt data encrypted by
|
||||||
|
* the other key -- ie., tests whether these two keys are part of the same public/private
|
||||||
|
* key pair.
|
||||||
|
*
|
||||||
|
* @param privateKey The private key to test
|
||||||
|
* @param publicKey The public key to test
|
||||||
|
* @return True if the keys are part of the same key pair and can decrypt each other's
|
||||||
|
* encrypted data, otherwise false.
|
||||||
|
*/
|
||||||
|
public static boolean keysDecryptEachOther ( PrivateKey privateKey, PublicKey publicKey ) {
|
||||||
|
byte[] plainText = "Test PlainText".getBytes();
|
||||||
|
|
||||||
|
byte[] dataEncryptedUsingPrivateKey = CryptUtils.encryptData(plainText, privateKey);
|
||||||
|
byte[] dataEncryptedUsingPublicKey = CryptUtils.encryptData(plainText, publicKey);
|
||||||
|
|
||||||
|
byte[] privateKeyDataDecryptedWithPublicKey = CryptUtils.decryptData(dataEncryptedUsingPrivateKey, publicKey);
|
||||||
|
byte[] publicKeyDataDecryptedWithPrivateKey = CryptUtils.decryptData(dataEncryptedUsingPublicKey, privateKey);
|
||||||
|
|
||||||
|
// Make sure we actually transformed the data during encryption:
|
||||||
|
if ( Arrays.equals(plainText, dataEncryptedUsingPrivateKey) ||
|
||||||
|
Arrays.equals(plainText, dataEncryptedUsingPublicKey) ||
|
||||||
|
Arrays.equals(dataEncryptedUsingPrivateKey, dataEncryptedUsingPublicKey) ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that we were able to recreate the original plaintext using
|
||||||
|
// both the public key on the private-key-encrypted data and the private
|
||||||
|
// key on the public-key-encrypted data:
|
||||||
|
if ( ! Arrays.equals(plainText, privateKeyDataDecryptedWithPublicKey) ||
|
||||||
|
! Arrays.equals(plainText, publicKeyDataDecryptedWithPrivateKey) ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,349 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.crypt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.security.*;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class to represent a GATK user key.
|
||||||
|
*
|
||||||
|
* A GATK user key contains an email address and a cryptographic signature.
|
||||||
|
* The signature is the SHA-1 hash of the email address encrypted using
|
||||||
|
* the GATK master private key. The GATK master public key (distributed
|
||||||
|
* with the GATK) is used to decrypt the signature and validate the key
|
||||||
|
* at the start of each GATK run that requires a key.
|
||||||
|
*
|
||||||
|
* Keys are cryptographically secure in that valid keys definitely come
|
||||||
|
* from us and cannot be fabricated, however nothing prevents keys from
|
||||||
|
* being shared between users.
|
||||||
|
*
|
||||||
|
* GATK user keys have the following on-disk format:
|
||||||
|
*
|
||||||
|
* GZIP Container:
|
||||||
|
* Email address
|
||||||
|
* NUL byte (delimiter)
|
||||||
|
* Cryptographic Signature (encrypted SHA-1 hash of email address)
|
||||||
|
*
|
||||||
|
* The key data is wrapped within a GZIP container to placate over-zealous
|
||||||
|
* email filters (since keys must often be emailed) and also to provide an
|
||||||
|
* additional integrity check via the built-in GZIP CRC.
|
||||||
|
*
|
||||||
|
* @author David Roazen
|
||||||
|
*/
|
||||||
|
public class GATKKey {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private key used to sign the GATK key. Required only when creating a new
|
||||||
|
* key from scratch, not when loading an existing key from disk.
|
||||||
|
*/
|
||||||
|
private PrivateKey privateKey;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public key used to validate the GATK key.
|
||||||
|
*/
|
||||||
|
private PublicKey publicKey;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The user's email address, stored within the key and signed.
|
||||||
|
*/
|
||||||
|
private String emailAddress;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The cryptographic signature of the email address. By default, this is
|
||||||
|
* the SHA-1 hash of the email address encrypted using the RSA algorithm.
|
||||||
|
*/
|
||||||
|
private byte[] signature;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The combination of hash/encryption algorithms to use to generate the signature.
|
||||||
|
* By default this is "SHA1withRSA"
|
||||||
|
*/
|
||||||
|
private String signingAlgorithm;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default hash/encryption algorithms to use to sign the key.
|
||||||
|
*/
|
||||||
|
public static final String DEFAULT_SIGNING_ALGORITHM = "SHA1withRSA";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Byte value used to separate the email address from its signature in the key file.
|
||||||
|
*/
|
||||||
|
public static final byte GATK_KEY_SECTIONAL_DELIMITER = 0;
|
||||||
|
|
||||||
|
|
||||||
|
// -----------------------
|
||||||
|
// Constructors:
|
||||||
|
// -----------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor to create a new GATK key from scratch using an email address
|
||||||
|
* and public/private key pair. The private key is used for signing, and the
|
||||||
|
* public key is used to validate the newly-created key.
|
||||||
|
*
|
||||||
|
* @param privateKey Private key used to sign the new GATK key
|
||||||
|
* @param publicKey Public key used to validate the new GATK key
|
||||||
|
* @param emailAddress The user's email address, which we will store in the key and sign
|
||||||
|
*/
|
||||||
|
public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress ) {
|
||||||
|
this(privateKey, publicKey, emailAddress, DEFAULT_SIGNING_ALGORITHM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor to create a new GATK key from scratch using an email address
|
||||||
|
* and public/private key pair, and additionally specify the signing algorithm
|
||||||
|
* to use. The private key is used for signing, and the public key is used to
|
||||||
|
* validate the newly-created key.
|
||||||
|
*
|
||||||
|
* @param privateKey Private key used to sign the new GATK key
|
||||||
|
* @param publicKey Public key used to validate the new GATK key
|
||||||
|
* @param emailAddress The user's email address, which we will store in the key and sign
|
||||||
|
* @param signingAlgorithm The combination of hash and encryption algorithms to use to sign the key
|
||||||
|
*/
|
||||||
|
public GATKKey ( PrivateKey privateKey, PublicKey publicKey, String emailAddress, String signingAlgorithm ) {
|
||||||
|
if ( privateKey == null || publicKey == null || emailAddress == null || emailAddress.length() == 0 || signingAlgorithm == null ) {
|
||||||
|
throw new ReviewedStingException("Cannot construct GATKKey using null/empty arguments");
|
||||||
|
}
|
||||||
|
|
||||||
|
this.privateKey = privateKey;
|
||||||
|
this.publicKey = publicKey;
|
||||||
|
this.emailAddress = emailAddress;
|
||||||
|
this.signingAlgorithm = signingAlgorithm;
|
||||||
|
|
||||||
|
validateEmailAddress();
|
||||||
|
generateSignature();
|
||||||
|
|
||||||
|
if ( ! isValid() ) {
|
||||||
|
throw new ReviewedStingException("Newly-generated GATK key fails validation -- this should never happen!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor to load an existing GATK key from a file.
|
||||||
|
*
|
||||||
|
* During loading, the key file is checked for integrity, but not cryptographic
|
||||||
|
* validity (which must be done through a subsequent call to isValid()).
|
||||||
|
*
|
||||||
|
* @param publicKey Public key that will be used to validate the loaded GATK key
|
||||||
|
* in subsequent calls to isValid()
|
||||||
|
* @param keyFile File containing the GATK key to load
|
||||||
|
*/
|
||||||
|
public GATKKey ( PublicKey publicKey, File keyFile ) {
|
||||||
|
this(publicKey, keyFile, DEFAULT_SIGNING_ALGORITHM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor to load an existing GATK key from a file, and additionally specify
|
||||||
|
* the signing algorithm used to sign the key being loaded.
|
||||||
|
*
|
||||||
|
* During loading, the key file is checked for integrity, but not cryptographic
|
||||||
|
* validity (which must be done through a subsequent call to isValid()).
|
||||||
|
*
|
||||||
|
* @param publicKey Public key that will be used to validate the loaded GATK key
|
||||||
|
* in subsequent calls to isValid()
|
||||||
|
* @param keyFile File containing the GATK key to load
|
||||||
|
* @param signingAlgorithm The combination of hash and encryption algorithms used to sign the key
|
||||||
|
*/
|
||||||
|
public GATKKey ( PublicKey publicKey, File keyFile, String signingAlgorithm ) {
|
||||||
|
if ( publicKey == null || keyFile == null || signingAlgorithm == null ) {
|
||||||
|
throw new ReviewedStingException("Cannot construct GATKKey using null arguments");
|
||||||
|
}
|
||||||
|
|
||||||
|
this.publicKey = publicKey;
|
||||||
|
this.signingAlgorithm = signingAlgorithm;
|
||||||
|
|
||||||
|
readKey(keyFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------
|
||||||
|
// Public API Methods:
|
||||||
|
// -----------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes out this key to a file in the format described at the top of this class,
|
||||||
|
* encapsulating the key within a GZIP container.
|
||||||
|
*
|
||||||
|
* @param destination File to write the key to
|
||||||
|
*/
|
||||||
|
public void writeKey ( File destination ) {
|
||||||
|
try {
|
||||||
|
byte[] keyBytes = marshalKeyData();
|
||||||
|
IOUtils.writeByteArrayToStream(keyBytes, new GZIPOutputStream(new FileOutputStream(destination)));
|
||||||
|
}
|
||||||
|
catch ( IOException e ) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(destination, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether the signature of this key is cryptographically valid (ie., can be
|
||||||
|
* decrypted by the public key to produce a valid SHA-1 hash of the email address
|
||||||
|
* in the key).
|
||||||
|
*
|
||||||
|
* @return True if the key's signature passes validation, otherwise false
|
||||||
|
*/
|
||||||
|
public boolean isValid() {
|
||||||
|
try {
|
||||||
|
Signature sig = Signature.getInstance(signingAlgorithm);
|
||||||
|
sig.initVerify(publicKey);
|
||||||
|
sig.update(emailAddress.getBytes());
|
||||||
|
return sig.verify(signature);
|
||||||
|
}
|
||||||
|
catch ( NoSuchAlgorithmException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e);
|
||||||
|
}
|
||||||
|
catch ( InvalidKeyException e ) {
|
||||||
|
// If the GATK public key is invalid, it's likely our problem, not the user's:
|
||||||
|
throw new ReviewedStingException(String.format("Public key %s is invalid", publicKey), e);
|
||||||
|
}
|
||||||
|
catch ( SignatureException e ) {
|
||||||
|
throw new UserException.UnreadableKeyException("Signature is invalid or signing algorithm was unable to process the input data", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------
|
||||||
|
// Private Helper Methods:
|
||||||
|
// -----------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method that creates a signature for this key using the combination of
|
||||||
|
* hash/encryption algorithms specified at construction time.
|
||||||
|
*/
|
||||||
|
private void generateSignature() {
|
||||||
|
try {
|
||||||
|
Signature sig = Signature.getInstance(signingAlgorithm);
|
||||||
|
sig.initSign(privateKey, CryptUtils.createRandomnessSource());
|
||||||
|
sig.update(emailAddress.getBytes());
|
||||||
|
signature = sig.sign();
|
||||||
|
}
|
||||||
|
catch ( NoSuchAlgorithmException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Signing algorithm %s not found", signingAlgorithm), e);
|
||||||
|
}
|
||||||
|
catch ( InvalidKeyException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Private key %s is invalid", privateKey), e);
|
||||||
|
}
|
||||||
|
catch ( SignatureException e ) {
|
||||||
|
throw new ReviewedStingException(String.format("Error creating signature for email address %s", emailAddress), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method that reads in a GATK key from a file. Should not be called directly --
|
||||||
|
* use the appropriate constructor above.
|
||||||
|
*
|
||||||
|
* @param source File to read the key from
|
||||||
|
*/
|
||||||
|
private void readKey ( File source ) {
|
||||||
|
try {
|
||||||
|
byte[] keyBytes = IOUtils.readStreamIntoByteArray(new GZIPInputStream(new FileInputStream(source)));
|
||||||
|
|
||||||
|
// As a sanity check, compare the number of bytes read to the uncompressed file size
|
||||||
|
// stored in the GZIP ISIZE field. If they don't match, the key must be corrupt:
|
||||||
|
if ( keyBytes.length != IOUtils.getGZIPFileUncompressedSize(source) ) {
|
||||||
|
throw new UserException.UnreadableKeyException("Number of bytes read does not match the uncompressed size specified in the GZIP ISIZE field");
|
||||||
|
}
|
||||||
|
|
||||||
|
unmarshalKeyData(keyBytes);
|
||||||
|
}
|
||||||
|
catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(source, e);
|
||||||
|
}
|
||||||
|
catch ( IOException e ) {
|
||||||
|
throw new UserException.UnreadableKeyException(source, e);
|
||||||
|
}
|
||||||
|
catch ( UserException.CouldNotReadInputFile e ) {
|
||||||
|
throw new UserException.UnreadableKeyException(source, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method that assembles the email address and signature into a format
|
||||||
|
* suitable for writing to disk.
|
||||||
|
*
|
||||||
|
* @return The aggregated key data, ready to be written to disk
|
||||||
|
*/
|
||||||
|
private byte[] marshalKeyData() {
|
||||||
|
byte[] emailAddressBytes = emailAddress.getBytes();
|
||||||
|
byte[] assembledKey = new byte[emailAddressBytes.length + 1 + signature.length];
|
||||||
|
|
||||||
|
System.arraycopy(emailAddressBytes, 0, assembledKey, 0, emailAddressBytes.length);
|
||||||
|
assembledKey[emailAddressBytes.length] = GATK_KEY_SECTIONAL_DELIMITER;
|
||||||
|
System.arraycopy(signature, 0, assembledKey, emailAddressBytes.length + 1, signature.length);
|
||||||
|
|
||||||
|
return assembledKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method that parses the raw key data from disk into its component
|
||||||
|
* email address and signature. Performs some basic validation in the process.
|
||||||
|
*
|
||||||
|
* @param keyBytes The raw, uncompressed key data read from disk
|
||||||
|
*/
|
||||||
|
private void unmarshalKeyData ( byte[] keyBytes ) {
|
||||||
|
int delimiterPosition = -1;
|
||||||
|
|
||||||
|
for ( int i = 0; i < keyBytes.length; i++ ) {
|
||||||
|
if ( keyBytes[i] == GATK_KEY_SECTIONAL_DELIMITER ) {
|
||||||
|
delimiterPosition = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( delimiterPosition == -1 ) {
|
||||||
|
throw new UserException.UnreadableKeyException("Malformed GATK key contains no sectional delimiter");
|
||||||
|
}
|
||||||
|
else if ( delimiterPosition == 0 ) {
|
||||||
|
throw new UserException.UnreadableKeyException("Malformed GATK key contains no email address");
|
||||||
|
}
|
||||||
|
else if ( delimiterPosition == keyBytes.length - 1 ) {
|
||||||
|
throw new UserException.UnreadableKeyException("Malformed GATK key contains no signature");
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] emailAddressBytes = new byte[delimiterPosition];
|
||||||
|
System.arraycopy(keyBytes, 0, emailAddressBytes, 0, delimiterPosition);
|
||||||
|
emailAddress = new String(emailAddressBytes);
|
||||||
|
|
||||||
|
signature = new byte[keyBytes.length - delimiterPosition - 1];
|
||||||
|
System.arraycopy(keyBytes, delimiterPosition + 1, signature, 0, keyBytes.length - delimiterPosition - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method that ensures that the user's email address does not contain the NUL byte, which we
|
||||||
|
* reserve as a delimiter within each key file.
|
||||||
|
*/
|
||||||
|
private void validateEmailAddress() {
|
||||||
|
for ( byte b : emailAddress.getBytes() ) {
|
||||||
|
if ( b == GATK_KEY_SECTIONAL_DELIMITER ) {
|
||||||
|
throw new UserException(String.format("Email address must not contain a byte with value %d", GATK_KEY_SECTIONAL_DELIMITER));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -132,6 +132,10 @@ public class UserException extends ReviewedStingException {
|
||||||
public CouldNotReadInputFile(File file, Exception e) {
|
public CouldNotReadInputFile(File file, Exception e) {
|
||||||
this(file, e.getMessage());
|
this(file, e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CouldNotReadInputFile(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -151,6 +155,10 @@ public class UserException extends ReviewedStingException {
|
||||||
public CouldNotCreateOutputFile(File file, Exception e) {
|
public CouldNotCreateOutputFile(File file, Exception e) {
|
||||||
super(String.format("Couldn't write file %s because exception %s", file.getAbsolutePath(), e.getMessage()));
|
super(String.format("Couldn't write file %s because exception %s", file.getAbsolutePath(), e.getMessage()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CouldNotCreateOutputFile(String message, Exception e) {
|
||||||
|
super(message, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MissortedBAM extends UserException {
|
public static class MissortedBAM extends UserException {
|
||||||
|
|
@ -319,4 +327,32 @@ public class UserException extends ReviewedStingException {
|
||||||
"and try again.", null);
|
"and try again.", null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class UnreadableKeyException extends UserException {
|
||||||
|
public UnreadableKeyException ( File f, Exception e ) {
|
||||||
|
super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " +
|
||||||
|
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
|
||||||
|
f.getAbsolutePath(), e.getMessage()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public UnreadableKeyException ( String message, Exception e ) {
|
||||||
|
this(String.format("%s. Error was: %s", message, e.getMessage()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public UnreadableKeyException ( String message ) {
|
||||||
|
super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " +
|
||||||
|
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
|
||||||
|
message));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class KeySignatureVerificationException extends UserException {
|
||||||
|
public KeySignatureVerificationException ( File f ) {
|
||||||
|
super(String.format("The signature in key file %s failed cryptographic verification. " +
|
||||||
|
"If this key was valid in the past, it's likely been revoked. " +
|
||||||
|
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
|
||||||
|
"for help.",
|
||||||
|
f.getAbsolutePath()));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -151,23 +151,16 @@ public class FragmentUtils {
|
||||||
final int numBases = firstReadStop + secondRead.getReadLength();
|
final int numBases = firstReadStop + secondRead.getReadLength();
|
||||||
final byte[] bases = new byte[numBases];
|
final byte[] bases = new byte[numBases];
|
||||||
final byte[] quals = new byte[numBases];
|
final byte[] quals = new byte[numBases];
|
||||||
// BUGBUG: too verbose, clean this up.
|
|
||||||
final byte[] insertionQuals = new byte[numBases];
|
final byte[] insertionQuals = new byte[numBases];
|
||||||
final byte[] deletionQuals = new byte[numBases];
|
final byte[] deletionQuals = new byte[numBases];
|
||||||
final byte[] firstReadBases = firstRead.getReadBases();
|
final byte[] firstReadBases = firstRead.getReadBases();
|
||||||
final byte[] firstReadQuals = firstRead.getBaseQualities();
|
final byte[] firstReadQuals = firstRead.getBaseQualities();
|
||||||
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
|
|
||||||
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
|
|
||||||
final byte[] secondReadBases = secondRead.getReadBases();
|
final byte[] secondReadBases = secondRead.getReadBases();
|
||||||
final byte[] secondReadQuals = secondRead.getBaseQualities();
|
final byte[] secondReadQuals = secondRead.getBaseQualities();
|
||||||
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
|
|
||||||
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
|
|
||||||
|
|
||||||
for(int iii = 0; iii < firstReadStop; iii++) {
|
for(int iii = 0; iii < firstReadStop; iii++) {
|
||||||
bases[iii] = firstReadBases[iii];
|
bases[iii] = firstReadBases[iii];
|
||||||
quals[iii] = firstReadQuals[iii];
|
quals[iii] = firstReadQuals[iii];
|
||||||
insertionQuals[iii] = firstReadInsertionQuals[iii];
|
|
||||||
deletionQuals[iii] = firstReadDeletionQuals[iii];
|
|
||||||
}
|
}
|
||||||
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
||||||
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
|
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
|
||||||
|
|
@ -175,22 +168,16 @@ public class FragmentUtils {
|
||||||
}
|
}
|
||||||
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
|
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
|
||||||
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
|
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
|
||||||
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
|
|
||||||
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
|
|
||||||
}
|
}
|
||||||
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
|
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
|
||||||
bases[iii] = secondReadBases[iii-firstReadStop];
|
bases[iii] = secondReadBases[iii-firstReadStop];
|
||||||
quals[iii] = secondReadQuals[iii-firstReadStop];
|
quals[iii] = secondReadQuals[iii-firstReadStop];
|
||||||
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
|
|
||||||
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
|
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
|
||||||
returnRead.setAlignmentStart(firstRead.getUnclippedStart());
|
returnRead.setAlignmentStart(firstRead.getUnclippedStart());
|
||||||
returnRead.setReadBases( bases );
|
returnRead.setReadBases( bases );
|
||||||
returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION );
|
returnRead.setBaseQualities( quals );
|
||||||
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
|
|
||||||
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
|
|
||||||
returnRead.setReadGroup( firstRead.getReadGroup() );
|
returnRead.setReadGroup( firstRead.getReadGroup() );
|
||||||
returnRead.setReferenceName( firstRead.getReferenceName() );
|
returnRead.setReferenceName( firstRead.getReferenceName() );
|
||||||
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
|
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
|
||||||
|
|
@ -199,6 +186,27 @@ public class FragmentUtils {
|
||||||
returnRead.setCigar( new Cigar( cList ));
|
returnRead.setCigar( new Cigar( cList ));
|
||||||
returnRead.setMappingQuality( firstRead.getMappingQuality() );
|
returnRead.setMappingQuality( firstRead.getMappingQuality() );
|
||||||
|
|
||||||
|
if( firstRead.hasBaseIndelQualities() || secondRead.hasBaseIndelQualities() ) {
|
||||||
|
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
|
||||||
|
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
|
||||||
|
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
|
||||||
|
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
|
||||||
|
for(int iii = 0; iii < firstReadStop; iii++) {
|
||||||
|
insertionQuals[iii] = firstReadInsertionQuals[iii];
|
||||||
|
deletionQuals[iii] = firstReadDeletionQuals[iii];
|
||||||
|
}
|
||||||
|
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
||||||
|
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score
|
||||||
|
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score
|
||||||
|
}
|
||||||
|
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
|
||||||
|
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
|
||||||
|
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
|
||||||
|
}
|
||||||
|
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
|
||||||
|
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
|
||||||
|
}
|
||||||
|
|
||||||
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();
|
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();
|
||||||
returnList.add(returnRead);
|
returnList.add(returnRead);
|
||||||
return returnList;
|
return returnList;
|
||||||
|
|
|
||||||
|
|
@ -29,10 +29,13 @@ import org.apache.commons.io.FilenameUtils;
|
||||||
import org.apache.commons.io.LineIterator;
|
import org.apache.commons.io.LineIterator;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class IOUtils {
|
public class IOUtils {
|
||||||
|
|
@ -400,4 +403,173 @@ public class IOUtils {
|
||||||
public static boolean isSpecialFile(File file) {
|
public static boolean isSpecialFile(File file) {
|
||||||
return file != null && (file.getAbsolutePath().startsWith("/dev/") || file.equals(DEV_DIR));
|
return file != null && (file.getAbsolutePath().startsWith("/dev/") || file.equals(DEV_DIR));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the entirety of the given file into a byte array. Uses a read buffer size of 4096 bytes.
|
||||||
|
*
|
||||||
|
* @param source File to read
|
||||||
|
* @return The contents of the file as a byte array
|
||||||
|
*/
|
||||||
|
public static byte[] readFileIntoByteArray ( File source ) {
|
||||||
|
return readFileIntoByteArray(source, 4096);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the entirety of the given file into a byte array using the requested read buffer size.
|
||||||
|
*
|
||||||
|
* @param source File to read
|
||||||
|
* @param readBufferSize Number of bytes to read in at one time
|
||||||
|
* @return The contents of the file as a byte array
|
||||||
|
*/
|
||||||
|
public static byte[] readFileIntoByteArray ( File source, int readBufferSize ) {
|
||||||
|
if ( source == null ) {
|
||||||
|
throw new ReviewedStingException("Source file was null");
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] fileContents;
|
||||||
|
|
||||||
|
try {
|
||||||
|
fileContents = readStreamIntoByteArray(new FileInputStream(source), readBufferSize);
|
||||||
|
}
|
||||||
|
catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(source, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( fileContents.length != source.length() ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(String.format("Unable to completely read file %s: read only %d/%d bytes",
|
||||||
|
source.getAbsolutePath(), fileContents.length, source.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return fileContents;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads all data from the given stream into a byte array. Uses a read buffer size of 4096 bytes.
|
||||||
|
*
|
||||||
|
* @param in Stream to read data from
|
||||||
|
* @return The contents of the stream as a byte array
|
||||||
|
*/
|
||||||
|
public static byte[] readStreamIntoByteArray ( InputStream in ) {
|
||||||
|
return readStreamIntoByteArray(in, 4096);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads all data from the given stream into a byte array using the requested read buffer size.
|
||||||
|
*
|
||||||
|
* @param in Stream to read data from
|
||||||
|
* @param readBufferSize Number of bytes to read in at one time
|
||||||
|
* @return The contents of the stream as a byte array
|
||||||
|
*/
|
||||||
|
public static byte[] readStreamIntoByteArray ( InputStream in, int readBufferSize ) {
|
||||||
|
if ( in == null ) {
|
||||||
|
throw new ReviewedStingException("Input stream was null");
|
||||||
|
}
|
||||||
|
else if ( readBufferSize <= 0 ) {
|
||||||
|
throw new ReviewedStingException("Read buffer size must be > 0");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use a fixed-size buffer for each read, but a dynamically-growing buffer
|
||||||
|
// to hold the accumulated contents of the file/stream:
|
||||||
|
byte[] readBuffer = new byte[readBufferSize];
|
||||||
|
ByteArrayOutputStream fileBuffer = new ByteArrayOutputStream(readBufferSize * 4);
|
||||||
|
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
int currentBytesRead;
|
||||||
|
|
||||||
|
while ( (currentBytesRead = in.read(readBuffer, 0, readBuffer.length)) >= 0 ) {
|
||||||
|
fileBuffer.write(readBuffer, 0, currentBytesRead);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch ( IOException e ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile("I/O error reading from input stream", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return fileBuffer.toByteArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes the given array of bytes to a file
|
||||||
|
*
|
||||||
|
* @param bytes Data to write
|
||||||
|
* @param destination File to write the data to
|
||||||
|
*/
|
||||||
|
public static void writeByteArrayToFile ( byte[] bytes, File destination ) {
|
||||||
|
if ( destination == null ) {
|
||||||
|
throw new ReviewedStingException("Destination file was null");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
writeByteArrayToStream(bytes, new FileOutputStream(destination));
|
||||||
|
}
|
||||||
|
catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(destination, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes the given array of bytes to a stream
|
||||||
|
*
|
||||||
|
* @param bytes Data to write
|
||||||
|
* @param out Stream to write the data to
|
||||||
|
*/
|
||||||
|
public static void writeByteArrayToStream ( byte[] bytes, OutputStream out ) {
|
||||||
|
if ( bytes == null || out == null ) {
|
||||||
|
throw new ReviewedStingException("Data to write or output stream was null");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
out.write(bytes);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch ( IOException e ) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile("I/O error writing to output stream", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the uncompressed size of a GZIP file. Uses the GZIP ISIZE field in the last
|
||||||
|
* 4 bytes of the file to get this information.
|
||||||
|
*
|
||||||
|
* @param gzipFile GZIP-format file whose uncompressed size to determine
|
||||||
|
* @return The uncompressed size (in bytes) of the GZIP file
|
||||||
|
*/
|
||||||
|
public static int getGZIPFileUncompressedSize ( File gzipFile ) {
|
||||||
|
if ( gzipFile == null ) {
|
||||||
|
throw new ReviewedStingException("GZIP file to examine was null");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// The GZIP ISIZE field holds the uncompressed size of the compressed data.
|
||||||
|
// It occupies the last 4 bytes of any GZIP file:
|
||||||
|
RandomAccessFile in = new RandomAccessFile(gzipFile, "r");
|
||||||
|
in.seek(gzipFile.length() - 4);
|
||||||
|
byte[] sizeBytes = new byte[4];
|
||||||
|
in.read(sizeBytes, 0, 4);
|
||||||
|
|
||||||
|
ByteBuffer byteBuf = ByteBuffer.wrap(sizeBytes);
|
||||||
|
byteBuf.order(ByteOrder.LITTLE_ENDIAN); // The GZIP spec mandates little-endian byte order
|
||||||
|
int uncompressedSize = byteBuf.getInt();
|
||||||
|
|
||||||
|
// If the size read in is negative, we've overflowed our signed integer:
|
||||||
|
if ( uncompressedSize < 0 ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(String.format("Cannot accurately determine the uncompressed size of file %s " +
|
||||||
|
"because it's either larger than %d bytes or the GZIP ISIZE field is corrupt",
|
||||||
|
gzipFile.getAbsolutePath(), Integer.MAX_VALUE));
|
||||||
|
}
|
||||||
|
|
||||||
|
return uncompressedSize;
|
||||||
|
}
|
||||||
|
catch ( IOException e ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(gzipFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -194,6 +194,10 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean hasBaseIndelQualities() {
|
||||||
|
return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null;
|
||||||
|
}
|
||||||
|
|
||||||
public byte[] getBaseInsertionQualities() {
|
public byte[] getBaseInsertionQualities() {
|
||||||
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
|
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
|
||||||
if( quals == null ) {
|
if( quals == null ) {
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.apache.log4j.Logger;
|
||||||
import org.apache.log4j.PatternLayout;
|
import org.apache.log4j.PatternLayout;
|
||||||
import org.apache.log4j.spi.LoggingEvent;
|
import org.apache.log4j.spi.LoggingEvent;
|
||||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||||
|
import org.broadinstitute.sting.utils.crypt.CryptUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.io.IOUtils;
|
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||||
|
|
||||||
|
|
@ -87,6 +88,9 @@ public abstract class BaseTest {
|
||||||
public static final File testDirFile = new File("public/testdata/");
|
public static final File testDirFile = new File("public/testdata/");
|
||||||
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
||||||
|
|
||||||
|
public static final String keysDataLocation = validationDataLocation + "keys/";
|
||||||
|
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
|
||||||
|
|
||||||
/** before the class starts up */
|
/** before the class starts up */
|
||||||
static {
|
static {
|
||||||
// setup a basic log configuration
|
// setup a basic log configuration
|
||||||
|
|
@ -141,7 +145,7 @@ public abstract class BaseTest {
|
||||||
*/
|
*/
|
||||||
public static class TestDataProvider {
|
public static class TestDataProvider {
|
||||||
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
||||||
private String name;
|
protected String name;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new TestDataProvider instance bound to the class variable C
|
* Create a new TestDataProvider instance bound to the class variable C
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,116 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.Median;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
public class MedianUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Provider
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class MedianTestProvider extends TestDataProvider {
|
||||||
|
final List<Integer> values = new ArrayList<Integer>();
|
||||||
|
final int cap;
|
||||||
|
final Integer expected;
|
||||||
|
|
||||||
|
public MedianTestProvider(int expected, int cap, Integer ... values) {
|
||||||
|
super(MedianTestProvider.class);
|
||||||
|
this.expected = expected;
|
||||||
|
this.cap = cap;
|
||||||
|
this.values.addAll(Arrays.asList(values));
|
||||||
|
this.name = String.format("values=%s expected=%d cap=%d", this.values, expected, cap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "MedianTestProvider")
|
||||||
|
public Object[][] makeMedianTestProvider() {
|
||||||
|
new MedianTestProvider(1, 1000, 0, 1, 2);
|
||||||
|
new MedianTestProvider(1, 1000, 1, 0, 1, 2);
|
||||||
|
new MedianTestProvider(1, 1000, 0, 1, 2, 3);
|
||||||
|
new MedianTestProvider(2, 1000, 0, 1, 2, 3, 4);
|
||||||
|
new MedianTestProvider(2, 1000, 4, 1, 2, 3, 0);
|
||||||
|
new MedianTestProvider(1, 1000, 1);
|
||||||
|
new MedianTestProvider(2, 1000, 2);
|
||||||
|
new MedianTestProvider(1, 1000, 1, 2);
|
||||||
|
|
||||||
|
new MedianTestProvider(1, 3, 1);
|
||||||
|
new MedianTestProvider(1, 3, 1, 2);
|
||||||
|
new MedianTestProvider(2, 3, 1, 2, 3);
|
||||||
|
new MedianTestProvider(2, 3, 1, 2, 3, 4);
|
||||||
|
new MedianTestProvider(2, 3, 1, 2, 3, 4, 5);
|
||||||
|
|
||||||
|
new MedianTestProvider(1, 3, 1);
|
||||||
|
new MedianTestProvider(1, 3, 1, 2);
|
||||||
|
new MedianTestProvider(2, 3, 3, 2, 1);
|
||||||
|
new MedianTestProvider(3, 3, 4, 3, 2, 1);
|
||||||
|
new MedianTestProvider(4, 3, 5, 4, 3, 2, 1);
|
||||||
|
|
||||||
|
return MedianTestProvider.getTests(MedianTestProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "MedianTestProvider")
|
||||||
|
public void testBasicLikelihoods(MedianTestProvider cfg) {
|
||||||
|
final Median<Integer> median = new Median<Integer>(cfg.cap);
|
||||||
|
|
||||||
|
int nAdded = 0;
|
||||||
|
for ( final int value : cfg.values )
|
||||||
|
if ( median.add(value) )
|
||||||
|
nAdded++;
|
||||||
|
|
||||||
|
Assert.assertEquals(nAdded, median.size());
|
||||||
|
|
||||||
|
Assert.assertEquals(cfg.values.isEmpty(), median.isEmpty());
|
||||||
|
Assert.assertEquals(cfg.values.size() >= cfg.cap, median.isFull());
|
||||||
|
Assert.assertEquals(median.getMedian(), cfg.expected, cfg.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = IllegalStateException.class)
|
||||||
|
public void testEmptyMedian() {
|
||||||
|
final Median<Integer> median = new Median<Integer>();
|
||||||
|
Assert.assertTrue(median.isEmpty());
|
||||||
|
final Integer d = 100;
|
||||||
|
Assert.assertEquals(median.getMedian(d), d);
|
||||||
|
median.getMedian();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.Tribble;
|
import org.broad.tribble.Tribble;
|
||||||
import org.broad.tribble.index.Index;
|
import org.broad.tribble.index.Index;
|
||||||
import org.broad.tribble.index.IndexFactory;
|
import org.broad.tribble.index.IndexFactory;
|
||||||
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
|
|
@ -45,7 +46,7 @@ import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class WalkerTest extends BaseTest {
|
public class WalkerTest extends BaseTest {
|
||||||
private static final boolean ENABLE_REPORTING = false;
|
private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false;
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void initializeRandomGenerator() {
|
public void initializeRandomGenerator() {
|
||||||
|
|
@ -121,11 +122,19 @@ public class WalkerTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
public class WalkerTestSpec {
|
public class WalkerTestSpec {
|
||||||
|
|
||||||
|
// Arguments implicitly included in all Walker command lines, unless explicitly
|
||||||
|
// disabled using the disableImplicitArgs() method below.
|
||||||
|
final String IMPLICIT_ARGS = ENABLE_PHONE_HOME_FOR_TESTS ?
|
||||||
|
String.format("-et %s", GATKRunReport.PhoneHomeOption.STANDARD) :
|
||||||
|
String.format("-et %s -K %s", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile);
|
||||||
|
|
||||||
String args = "";
|
String args = "";
|
||||||
int nOutputFiles = -1;
|
int nOutputFiles = -1;
|
||||||
List<String> md5s = null;
|
List<String> md5s = null;
|
||||||
List<String> exts = null;
|
List<String> exts = null;
|
||||||
Class expectedException = null;
|
Class expectedException = null;
|
||||||
|
boolean includeImplicitArgs = true;
|
||||||
|
|
||||||
// the default output path for the integration test
|
// the default output path for the integration test
|
||||||
private File outputFileLocation = null;
|
private File outputFileLocation = null;
|
||||||
|
|
@ -159,6 +168,10 @@ public class WalkerTest extends BaseTest {
|
||||||
this.expectedException = expectedException;
|
this.expectedException = expectedException;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getArgsWithImplicitArgs() {
|
||||||
|
return args + (includeImplicitArgs ? " " + IMPLICIT_ARGS : "");
|
||||||
|
}
|
||||||
|
|
||||||
public void setOutputFileLocation(File outputFileLocation) {
|
public void setOutputFileLocation(File outputFileLocation) {
|
||||||
this.outputFileLocation = outputFileLocation;
|
this.outputFileLocation = outputFileLocation;
|
||||||
}
|
}
|
||||||
|
|
@ -180,6 +193,9 @@ public class WalkerTest extends BaseTest {
|
||||||
auxillaryFiles.put(expectededMD5sum, outputfile);
|
auxillaryFiles.put(expectededMD5sum, outputfile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void disableImplicitArgs() {
|
||||||
|
includeImplicitArgs = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean parameterize() {
|
protected boolean parameterize() {
|
||||||
|
|
@ -213,7 +229,7 @@ public class WalkerTest extends BaseTest {
|
||||||
tmpFiles.add(fl);
|
tmpFiles.add(fl);
|
||||||
}
|
}
|
||||||
|
|
||||||
final String args = String.format(spec.args, tmpFiles.toArray());
|
final String args = String.format(spec.getArgsWithImplicitArgs(), tmpFiles.toArray());
|
||||||
System.out.println(Utils.dupString('-', 80));
|
System.out.println(Utils.dupString('-', 80));
|
||||||
|
|
||||||
if ( spec.expectsException() ) {
|
if ( spec.expectsException() ) {
|
||||||
|
|
@ -277,13 +293,10 @@ public class WalkerTest extends BaseTest {
|
||||||
* @param args the argument list
|
* @param args the argument list
|
||||||
* @param expectedException the expected exception or null
|
* @param expectedException the expected exception or null
|
||||||
*/
|
*/
|
||||||
public static void executeTest(String name, String args, Class expectedException) {
|
private void executeTest(String name, String args, Class expectedException) {
|
||||||
CommandLineGATK instance = new CommandLineGATK();
|
CommandLineGATK instance = new CommandLineGATK();
|
||||||
String[] command = Utils.escapeExpressions(args);
|
String[] command = Utils.escapeExpressions(args);
|
||||||
|
|
||||||
// add the logging level to each of the integration test commands
|
|
||||||
command = Utils.appendArray(command, "-et", ENABLE_REPORTING ? "STANDARD" : "NO_ET");
|
|
||||||
|
|
||||||
// run the executable
|
// run the executable
|
||||||
boolean gotAnException = false;
|
boolean gotAnException = false;
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,103 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.BitSet;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Short one line description of the walker.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* [Long description of the walker]
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* [Description of the Input]
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* [Description of the Output]
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -T [walker name]
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Mauricio Carneiro
|
||||||
|
* @since 3/1/12
|
||||||
|
*/
|
||||||
|
public class ContextCovariateUnitTest {
|
||||||
|
ContextCovariate covariate;
|
||||||
|
RecalibrationArgumentCollection RAC;
|
||||||
|
Random random;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void init() {
|
||||||
|
RAC = new RecalibrationArgumentCollection();
|
||||||
|
covariate = new ContextCovariate();
|
||||||
|
random = GenomeAnalysisEngine.getRandomGenerator();
|
||||||
|
covariate.initialize(RAC);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testSimpleContexts() {
|
||||||
|
byte [] quals = createRandomReadQuals(101);
|
||||||
|
byte [] bbases = createRandomReadBases(101);
|
||||||
|
String bases = stringFrom(bbases);
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
|
||||||
|
CovariateValues values = covariate.getValues(read);
|
||||||
|
verifyCovariateArray((BitSet []) values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
|
||||||
|
verifyCovariateArray((BitSet []) values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
|
||||||
|
verifyCovariateArray((BitSet []) values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
|
||||||
|
for (int i=0; i<values.length; i++) {
|
||||||
|
if (i >= contextSize)
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(values[i]), bases.substring(i-contextSize, i));
|
||||||
|
else
|
||||||
|
Assert.assertNull(values[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String stringFrom(byte [] array) {
|
||||||
|
String s = "";
|
||||||
|
for (byte value : array)
|
||||||
|
s += (char) value;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte [] createRandomReadQuals(int length) {
|
||||||
|
byte [] quals = new byte[length];
|
||||||
|
for (int i=0; i<length; i++)
|
||||||
|
quals[i] = (byte) random.nextInt(50);
|
||||||
|
return quals;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte [] createRandomReadBases(int length) {
|
||||||
|
byte [] bases = new byte[length];
|
||||||
|
for (int i=0; i<length; i++) {
|
||||||
|
switch(random.nextInt(4)) {
|
||||||
|
case 0: bases[i] = 'A'; break;
|
||||||
|
case 1: bases[i] = 'C'; break;
|
||||||
|
case 2: bases[i] = 'G'; break;
|
||||||
|
case 3: bases[i] = 'T'; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bases;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests ReadGroupProperties
|
||||||
|
*/
|
||||||
|
public class ReadGroupPropertiesIntegrationTest extends WalkerTest {
|
||||||
|
@Test
|
||||||
|
public void basicTest() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s",
|
||||||
|
1,
|
||||||
|
Arrays.asList("6b8cce223af28cbadcfe87a3b841fc56"));
|
||||||
|
executeTest("ReadGroupProperties:", spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultiSamplePilot1() {
|
public void testMultiSamplePilot1() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||||
Arrays.asList("202b337ebbea3def1be8495eb363dfa8"));
|
Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232"));
|
||||||
executeTest("test MultiSample Pilot1", spec);
|
executeTest("test MultiSample Pilot1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testSingleSamplePilot2() {
|
public void testSingleSamplePilot2() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("ae29b9c9aacce8046dc780430540cd62"));
|
Arrays.asList("c5b53231f4f6d9524bc4ec8115f44f5c"));
|
||||||
executeTest("test SingleSample Pilot2", spec);
|
executeTest("test SingleSample Pilot2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultipleSNPAlleles() {
|
public void testMultipleSNPAlleles() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
||||||
Arrays.asList("10027d13befaa07b7900a7af0ae0791c"));
|
Arrays.asList("5af005255240a2186f04cb50851b8b6f"));
|
||||||
executeTest("test Multiple SNP alleles", spec);
|
executeTest("test Multiple SNP alleles", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65";
|
private final static String COMPRESSED_OUTPUT_MD5 = "a08df9aea2b3df09cf90ff8e6e3be3ea";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCompressedOutput() {
|
public void testCompressedOutput() {
|
||||||
|
|
@ -91,7 +91,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||||
|
|
||||||
String md5 = "32a34362dff51d8b73a3335048516d82";
|
String md5 = "6358934c1c26345013a38261b8c45aa4";
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||||
|
|
@ -179,8 +179,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testHeterozyosity() {
|
public void testHeterozyosity() {
|
||||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||||
e.put( 0.01, "2cb2544739e01f6c08fd820112914317" );
|
e.put( 0.01, "926b58038dd4989bf7eda697a847eea9" );
|
||||||
e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" );
|
e.put( 1.0 / 1850, "93f44105b43b65730a3b821e27b0fa16" );
|
||||||
|
|
||||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
@ -204,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,100,000",
|
" -L 1:10,000,000-10,100,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
|
Arrays.asList("a1b75a7e12b160b0be823228c958573f"));
|
||||||
|
|
||||||
executeTest(String.format("test multiple technologies"), spec);
|
executeTest(String.format("test multiple technologies"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -223,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -L 1:10,000,000-10,100,000" +
|
" -L 1:10,000,000-10,100,000" +
|
||||||
" -baq CALCULATE_AS_NECESSARY",
|
" -baq CALCULATE_AS_NECESSARY",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
|
Arrays.asList("3bda1279cd6dcb47885f3e19466f11b9"));
|
||||||
|
|
||||||
executeTest(String.format("test calling with BAQ"), spec);
|
executeTest(String.format("test calling with BAQ"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,500,000",
|
" -L 1:10,000,000-10,500,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899"));
|
Arrays.asList("d9fc3ba94a0d46029778c7b457e7292a"));
|
||||||
|
|
||||||
executeTest(String.format("test indel caller in SLX"), spec);
|
executeTest(String.format("test indel caller in SLX"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -257,7 +257,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -minIndelCnt 1" +
|
" -minIndelCnt 1" +
|
||||||
" -L 1:10,000,000-10,100,000",
|
" -L 1:10,000,000-10,100,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6"));
|
Arrays.asList("b2e30ae3e5ffa6108f9f6178b1d2e679"));
|
||||||
|
|
||||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,500,000",
|
" -L 1:10,000,000-10,500,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f"));
|
Arrays.asList("2cd182a84613fa91a6020466d2d327e2"));
|
||||||
|
|
||||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd"));
|
Arrays.asList("9cd08dc412a007933381e9c76c073899"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -290,7 +290,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("320f61c87253aba77d6dc782242cba8b"));
|
Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||||
Arrays.asList("c9897b80615c53a4ea10a4b193d56d9c"));
|
Arrays.asList("2609675a356f2dfc86f8a1d911210978"));
|
||||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||||
Arrays.asList("5282fdb1711a532d726c13507bf80a21"));
|
Arrays.asList("4fdd8da77167881b71b3547da5c13f94"));
|
||||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,163 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic unit test for Haplotype Class
|
||||||
|
*/
|
||||||
|
public class HaplotypeUnitTest extends BaseTest {
|
||||||
|
@BeforeClass
|
||||||
|
public void init() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimpleInsertionAllele() {
|
||||||
|
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
|
|
||||||
|
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||||
|
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||||
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
|
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
|
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
|
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
|
||||||
|
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimpleDeletionAllele() {
|
||||||
|
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
|
|
||||||
|
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||||
|
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||||
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
|
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
|
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
|
||||||
|
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
|
||||||
|
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimpleSNPAllele() {
|
||||||
|
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
|
|
||||||
|
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||||
|
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||||
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
|
String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
|
basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA";
|
||||||
|
basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA";
|
||||||
|
basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testComplexInsertionAllele() {
|
||||||
|
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
|
||||||
|
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||||
|
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(10, CigarOperator.I));
|
||||||
|
h1CigarList.add(new CigarElement(8, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(3, CigarOperator.D));
|
||||||
|
h1CigarList.add(new CigarElement(7, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||||
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
|
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
|
||||||
|
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testComplexDeletionAllele() {
|
||||||
|
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
|
||||||
|
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||||
|
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(10, CigarOperator.I));
|
||||||
|
h1CigarList.add(new CigarElement(8, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(3, CigarOperator.D));
|
||||||
|
h1CigarList.add(new CigarElement(7, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||||
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
|
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
|
||||||
|
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testComplexSNPAllele() {
|
||||||
|
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
|
||||||
|
final ArrayList<CigarElement> h1CigarList = new ArrayList<CigarElement>();
|
||||||
|
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(10, CigarOperator.I));
|
||||||
|
h1CigarList.add(new CigarElement(8, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(3, CigarOperator.D));
|
||||||
|
h1CigarList.add(new CigarElement(7, CigarOperator.M));
|
||||||
|
h1CigarList.add(new CigarElement(4, CigarOperator.M));
|
||||||
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
|
String h1bases = "AGCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
basicInsertTest("T", "G", 1, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCTATCG" + "AGGGGGA" + "AGGC";
|
||||||
|
basicInsertTest("G", "T", 7, h1Cigar, bases, h1bases);
|
||||||
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGCGGGA" + "AGGC";
|
||||||
|
basicInsertTest("G", "C", 17, h1Cigar, bases, h1bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
|
||||||
|
final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
|
||||||
|
final Haplotype h = new Haplotype(hap.getBytes());
|
||||||
|
final Allele h1refAllele = Allele.create(ref, true);
|
||||||
|
final Allele h1altAllele = Allele.create(alt, false);
|
||||||
|
final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE, 0, cigar) );
|
||||||
|
final Haplotype h1expected = new Haplotype(newHap.getBytes());
|
||||||
|
Assert.assertEquals(h1, h1expected);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -205,16 +205,9 @@ public class MathUtilsUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
/**
|
||||||
public void testIntAndBitSetConversion() {
|
* Private functions used by testArrayShuffle()
|
||||||
Assert.assertEquals(428, MathUtils.intFrom(MathUtils.bitSetFrom(428)));
|
*/
|
||||||
Assert.assertEquals(239847, MathUtils.intFrom(MathUtils.bitSetFrom(239847)));
|
|
||||||
Assert.assertEquals(12726, MathUtils.intFrom(MathUtils.bitSetFrom(12726)));
|
|
||||||
Assert.assertEquals(0, MathUtils.intFrom(MathUtils.bitSetFrom(0)));
|
|
||||||
Assert.assertEquals(1, MathUtils.intFrom(MathUtils.bitSetFrom(1)));
|
|
||||||
Assert.assertEquals(65536, MathUtils.intFrom(MathUtils.bitSetFrom(65536)));
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean hasUniqueElements(Object[] x) {
|
private boolean hasUniqueElements(Object[] x) {
|
||||||
for (int i = 0; i < x.length; i++)
|
for (int i = 0; i < x.length; i++)
|
||||||
for (int j = i + 1; j < x.length; j++)
|
for (int j = i + 1; j < x.length; j++)
|
||||||
|
|
@ -230,10 +223,100 @@ public class MathUtilsUnitTest extends BaseTest {
|
||||||
return set.isEmpty();
|
return set.isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void p (Object []x) {
|
@Test(enabled = true)
|
||||||
for (Object v: x)
|
public void testIntAndBitSetConversion() {
|
||||||
System.out.print((Integer) v + " ");
|
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428);
|
||||||
System.out.println();
|
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(239847)), 239847);
|
||||||
|
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(12726)), 12726);
|
||||||
|
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(0)), 0);
|
||||||
|
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(1)), 1);
|
||||||
|
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(65536)), 65536);
|
||||||
|
Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(Long.MAX_VALUE)), Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testDNAAndBitSetConversion() {
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("ACGT")), "ACGT");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AGGTGTTGT")), "AGGTGTTGT");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("A")), "A");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("C")), "C");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("G")), "G");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("T")), "T");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CC")), "CC");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AA")), "AA");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AAAA")), "AAAA");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CCCCCCCCCCCCCC")), "CCCCCCCCCCCCCC");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GGGGGGGGGGGGGG")), "GGGGGGGGGGGGGG");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("TTTTTTTTTTTTTT")), "TTTTTTTTTTTTTT");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GTAGACCGATCTCAGCTAGT")), "GTAGACCGATCTCAGCTAGT");
|
||||||
|
Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testApproximateLog10SumLog10() {
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
|
||||||
|
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
|
||||||
|
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
|
||||||
|
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNormalizeFromLog10() {
|
||||||
|
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8}));
|
||||||
|
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8}));
|
||||||
|
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9}));
|
||||||
|
|
||||||
|
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25}));
|
||||||
|
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
|
||||||
|
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private function used by testNormalizeFromLog10()
|
||||||
|
*/
|
||||||
|
private boolean compareDoubleArrays(double[] b1, double[] b2) {
|
||||||
|
if( b1.length != b2.length ) {
|
||||||
|
return false; // sanity check
|
||||||
|
}
|
||||||
|
|
||||||
|
for( int i=0; i < b1.length; i++ ){
|
||||||
|
if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,177 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.crypt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.testng.Assert;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.security.Key;
|
||||||
|
import java.security.KeyPair;
|
||||||
|
import java.security.PrivateKey;
|
||||||
|
import java.security.PublicKey;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class CryptUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGenerateValidKeyPairWithDefaultSettings() {
|
||||||
|
KeyPair keyPair = CryptUtils.generateKeyPair();
|
||||||
|
Assert.assertTrue(CryptUtils.keysDecryptEachOther(keyPair.getPrivate(), keyPair.getPublic()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider( name = "InvalidKeyPairSettings" )
|
||||||
|
public Object[][] invalidKeyPairSettingsDataProvider() {
|
||||||
|
return new Object[][] {
|
||||||
|
{ -1, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM},
|
||||||
|
{ CryptUtils.DEFAULT_KEY_LENGTH, "Made-up algorithm", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM},
|
||||||
|
{ CryptUtils.DEFAULT_KEY_LENGTH, CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM, "Made-up algorithm"}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( dataProvider = "InvalidKeyPairSettings", expectedExceptions = ReviewedStingException.class )
|
||||||
|
public void testGenerateKeyPairWithInvalidSettings( int keyLength, String encryptionAlgorithm, String randomNumberGenerationAlgorithm ) {
|
||||||
|
KeyPair keyPair = CryptUtils.generateKeyPair(keyLength, encryptionAlgorithm, randomNumberGenerationAlgorithm);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGATKMasterKeyPairMutualDecryption() {
|
||||||
|
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() {
|
||||||
|
Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKeyPairWriteThenRead() {
|
||||||
|
KeyPair keyPair = CryptUtils.generateKeyPair();
|
||||||
|
File privateKeyFile = createTempFile("testKeyPairWriteThenRead_private", "key");
|
||||||
|
File publicKeyFile = createTempFile("testKeyPairWriteThenRead_public", "key");
|
||||||
|
|
||||||
|
CryptUtils.writeKeyPair(keyPair, privateKeyFile, publicKeyFile);
|
||||||
|
|
||||||
|
assertKeysAreEqual(keyPair.getPrivate(), CryptUtils.readPrivateKey(privateKeyFile));
|
||||||
|
assertKeysAreEqual(keyPair.getPublic(), CryptUtils.readPublicKey(publicKeyFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPublicKeyWriteThenReadFromFile() {
|
||||||
|
File keyFile = createTempFile("testPublicKeyWriteThenReadFromFile", "key");
|
||||||
|
PublicKey publicKey = CryptUtils.generateKeyPair().getPublic();
|
||||||
|
|
||||||
|
CryptUtils.writeKey(publicKey, keyFile);
|
||||||
|
|
||||||
|
assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(keyFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPublicKeyWriteThenReadFromStream() throws IOException {
|
||||||
|
File keyFile = createTempFile("testPublicKeyWriteThenReadFromStream", "key");
|
||||||
|
PublicKey publicKey = CryptUtils.generateKeyPair().getPublic();
|
||||||
|
|
||||||
|
CryptUtils.writeKey(publicKey, keyFile);
|
||||||
|
|
||||||
|
assertKeysAreEqual(publicKey, CryptUtils.readPublicKey(new FileInputStream(keyFile)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPrivateKeyWriteThenReadFromFile() {
|
||||||
|
File keyFile = createTempFile("testPrivateKeyWriteThenReadFromFile", "key");
|
||||||
|
PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate();
|
||||||
|
|
||||||
|
CryptUtils.writeKey(privateKey, keyFile);
|
||||||
|
|
||||||
|
assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(keyFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPrivateKeyWriteThenReadFromStream() throws IOException {
|
||||||
|
File keyFile = createTempFile("testPrivateKeyWriteThenReadFromStream", "key");
|
||||||
|
PrivateKey privateKey = CryptUtils.generateKeyPair().getPrivate();
|
||||||
|
|
||||||
|
CryptUtils.writeKey(privateKey, keyFile);
|
||||||
|
|
||||||
|
assertKeysAreEqual(privateKey, CryptUtils.readPrivateKey(new FileInputStream(keyFile)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
|
||||||
|
public void testReadNonExistentPublicKey() {
|
||||||
|
File nonExistentFile = new File("jdshgkdfhg.key");
|
||||||
|
Assert.assertFalse(nonExistentFile.exists());
|
||||||
|
|
||||||
|
CryptUtils.readPublicKey(nonExistentFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
|
||||||
|
public void testReadNonExistentPrivateKey() {
|
||||||
|
File nonExistentFile = new File("jdshgkdfhg.key");
|
||||||
|
Assert.assertFalse(nonExistentFile.exists());
|
||||||
|
|
||||||
|
CryptUtils.readPrivateKey(nonExistentFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDecodePublicKey() {
|
||||||
|
PublicKey originalKey = CryptUtils.generateKeyPair().getPublic();
|
||||||
|
PublicKey decodedKey = CryptUtils.decodePublicKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM);
|
||||||
|
assertKeysAreEqual(originalKey, decodedKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDecodePrivateKey() {
|
||||||
|
PrivateKey originalKey = CryptUtils.generateKeyPair().getPrivate();
|
||||||
|
PrivateKey decodedKey = CryptUtils.decodePrivateKey(originalKey.getEncoded(), CryptUtils.DEFAULT_ENCRYPTION_ALGORITHM);
|
||||||
|
assertKeysAreEqual(originalKey, decodedKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLoadGATKMasterPrivateKey() {
|
||||||
|
PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLoadGATKMasterPublicKey() {
|
||||||
|
PublicKey gatkMasterPublicKey = CryptUtils.loadGATKMasterPublicKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLoadGATKDistributedPublicKey() {
|
||||||
|
PublicKey gatkDistributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertKeysAreEqual( Key originalKey, Key keyFromDisk ) {
|
||||||
|
Assert.assertTrue(Arrays.equals(originalKey.getEncoded(), keyFromDisk.getEncoded()));
|
||||||
|
Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm());
|
||||||
|
Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,156 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.crypt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class GATKKeyIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
|
public static final String BASE_COMMAND = String.format("-T PrintReads -R %s -I %s -o %%s",
|
||||||
|
testDir + "exampleFASTA.fasta",
|
||||||
|
testDir + "exampleBAM.bam");
|
||||||
|
public static final String MD5_UPON_SUCCESSFUL_RUN = "b9dc5bf6753ca2819e70b056eaf61258";
|
||||||
|
|
||||||
|
|
||||||
|
private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) {
|
||||||
|
String command = BASE_COMMAND + String.format(" %s %s", etArg, keyArg);
|
||||||
|
|
||||||
|
WalkerTestSpec spec = expectedException != null ?
|
||||||
|
new WalkerTestSpec(command, 1, expectedException) :
|
||||||
|
new WalkerTestSpec(command, 1, Arrays.asList(md5));
|
||||||
|
|
||||||
|
spec.disableImplicitArgs(); // Turn off automatic inclusion of -et/-K args by WalkerTest
|
||||||
|
executeTest(testName, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValidKeyNoET() {
|
||||||
|
runGATKKeyTest("testValidKeyNoET",
|
||||||
|
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
|
||||||
|
"-K " + keysDataLocation + "valid.key",
|
||||||
|
null,
|
||||||
|
MD5_UPON_SUCCESSFUL_RUN);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValidKeyETStdout() {
|
||||||
|
runGATKKeyTest("testValidKeyETStdout",
|
||||||
|
"-et " + GATKRunReport.PhoneHomeOption.STDOUT,
|
||||||
|
"-K " + keysDataLocation + "valid.key",
|
||||||
|
null,
|
||||||
|
MD5_UPON_SUCCESSFUL_RUN);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValidKeyETStandard() {
|
||||||
|
runGATKKeyTest("testValidKeyETStandard",
|
||||||
|
"",
|
||||||
|
"-K " + keysDataLocation + "valid.key",
|
||||||
|
null,
|
||||||
|
MD5_UPON_SUCCESSFUL_RUN);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoKeyNoET() {
|
||||||
|
runGATKKeyTest("testNoKeyNoET",
|
||||||
|
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
|
||||||
|
"",
|
||||||
|
UserException.class,
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoKeyETStdout() {
|
||||||
|
runGATKKeyTest("testNoKeyETStdout",
|
||||||
|
"-et " + GATKRunReport.PhoneHomeOption.STDOUT,
|
||||||
|
"",
|
||||||
|
UserException.class,
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoKeyETStandard() {
|
||||||
|
runGATKKeyTest("testNoKeyETStandard",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
null,
|
||||||
|
MD5_UPON_SUCCESSFUL_RUN);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRevokedKey() {
|
||||||
|
runGATKKeyTest("testRevokedKey",
|
||||||
|
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
|
||||||
|
"-K " + keysDataLocation + "revoked.key",
|
||||||
|
UserException.KeySignatureVerificationException.class,
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "CorruptKeyTestData")
|
||||||
|
public Object[][] corruptKeyDataProvider() {
|
||||||
|
return new Object[][] {
|
||||||
|
{ "corrupt_empty.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_single_byte_file.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_random_contents.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_single_byte_deletion.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_single_byte_insertion.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_single_byte_change.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_multi_byte_deletion.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_multi_byte_insertion.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_multi_byte_change.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_bad_isize_field.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_bad_crc.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_no_email_address.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_no_sectional_delimiter.key", UserException.KeySignatureVerificationException.class },
|
||||||
|
{ "corrupt_no_signature.key", UserException.UnreadableKeyException.class },
|
||||||
|
{ "corrupt_bad_signature.key", UserException.KeySignatureVerificationException.class },
|
||||||
|
{ "corrupt_non_gzipped_valid_key.key", UserException.UnreadableKeyException.class }
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "CorruptKeyTestData")
|
||||||
|
public void testCorruptKey ( String corruptKeyName, Class expectedException ) {
|
||||||
|
runGATKKeyTest(String.format("testCorruptKey (%s)", corruptKeyName),
|
||||||
|
"-et " + GATKRunReport.PhoneHomeOption.NO_ET,
|
||||||
|
"-K " + keysDataLocation + corruptKeyName,
|
||||||
|
expectedException,
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCorruptButNonRequiredKey() {
|
||||||
|
runGATKKeyTest("testCorruptButNonRequiredKey",
|
||||||
|
"",
|
||||||
|
"-K " + keysDataLocation + "corrupt_random_contents.key",
|
||||||
|
null,
|
||||||
|
MD5_UPON_SUCCESSFUL_RUN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,113 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.crypt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.testng.Assert;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.security.KeyPair;
|
||||||
|
import java.security.PrivateKey;
|
||||||
|
import java.security.PublicKey;
|
||||||
|
|
||||||
|
public class GATKKeyUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreateGATKKeyUsingMasterKeyPair() {
|
||||||
|
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||||
|
PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey();
|
||||||
|
|
||||||
|
// We should be able to create a valid GATKKey using our master key pair:
|
||||||
|
GATKKey key = new GATKKey(masterPrivateKey, masterPublicKey, "foo@bar.com");
|
||||||
|
Assert.assertTrue(key.isValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() {
|
||||||
|
PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey();
|
||||||
|
PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||||
|
|
||||||
|
// We should also be able to create a valid GATKKey using our master private
|
||||||
|
// key and the public key we distribute with the GATK:
|
||||||
|
GATKKey key = new GATKKey(masterPrivateKey, distributedPublicKey, "foo@bar.com");
|
||||||
|
Assert.assertTrue(key.isValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = ReviewedStingException.class )
|
||||||
|
public void testKeyPairMismatch() {
|
||||||
|
KeyPair firstKeyPair = CryptUtils.generateKeyPair();
|
||||||
|
KeyPair secondKeyPair = CryptUtils.generateKeyPair();
|
||||||
|
|
||||||
|
// Attempting to create a GATK Key with private and public keys that aren't part of the
|
||||||
|
// same key pair should immediately trigger a validation failure:
|
||||||
|
GATKKey key = new GATKKey(firstKeyPair.getPrivate(), secondKeyPair.getPublic(), "foo@bar.com");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = ReviewedStingException.class )
|
||||||
|
public void testEncryptionAlgorithmMismatch() {
|
||||||
|
KeyPair keyPair = CryptUtils.generateKeyPair(CryptUtils.DEFAULT_KEY_LENGTH, "DSA", CryptUtils.DEFAULT_RANDOM_NUMBER_GENERATION_ALGORITHM);
|
||||||
|
|
||||||
|
// Attempting to use a DSA private key to create an RSA signature should throw an error:
|
||||||
|
GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), "foo@bar.com", "SHA1withRSA");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.class )
|
||||||
|
public void testInvalidEmailAddress() {
|
||||||
|
String emailAddressWithNulByte = new String(new byte[] { 0 });
|
||||||
|
KeyPair keyPair = CryptUtils.generateKeyPair();
|
||||||
|
|
||||||
|
// Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file:
|
||||||
|
GATKKey key = new GATKKey(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey(),
|
||||||
|
emailAddressWithNulByte);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreateGATKKeyFromValidKeyFile() {
|
||||||
|
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "valid.key"));
|
||||||
|
Assert.assertTrue(key.isValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.UnreadableKeyException.class )
|
||||||
|
public void testCreateGATKKeyFromCorruptKeyFile() {
|
||||||
|
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "corrupt_random_contents.key"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreateGATKKeyFromRevokedKeyFile() {
|
||||||
|
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), new File(keysDataLocation + "revoked.key"));
|
||||||
|
Assert.assertFalse(key.isValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
|
||||||
|
public void testCreateGATKKeyFromNonExistentFile() {
|
||||||
|
File nonExistentFile = new File("ghfdkgsdhg.key");
|
||||||
|
Assert.assertFalse(nonExistentFile.exists());
|
||||||
|
|
||||||
|
GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -27,12 +27,18 @@ package org.broadinstitute.sting.utils.io;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
public class IOUtilsUnitTest extends BaseTest {
|
public class IOUtilsUnitTest extends BaseTest {
|
||||||
|
|
@ -230,4 +236,90 @@ public class IOUtilsUnitTest extends BaseTest {
|
||||||
Assert.assertFalse(IOUtils.isSpecialFile(new File("/home/user/my.file")));
|
Assert.assertFalse(IOUtils.isSpecialFile(new File("/home/user/my.file")));
|
||||||
Assert.assertFalse(IOUtils.isSpecialFile(new File("/devfake/null")));
|
Assert.assertFalse(IOUtils.isSpecialFile(new File("/devfake/null")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@DataProvider( name = "ByteArrayIOTestData")
|
||||||
|
public Object[][] byteArrayIOTestDataProvider() {
|
||||||
|
return new Object[][] {
|
||||||
|
// file size, read buffer size
|
||||||
|
{ 0, 4096 },
|
||||||
|
{ 1, 4096 },
|
||||||
|
{ 2000, 4096 },
|
||||||
|
{ 4095, 4096 },
|
||||||
|
{ 4096, 4096 },
|
||||||
|
{ 4097, 4096 },
|
||||||
|
{ 6000, 4096 },
|
||||||
|
{ 8191, 4096 },
|
||||||
|
{ 8192, 4096 },
|
||||||
|
{ 8193, 4096 },
|
||||||
|
{ 10000, 4096 }
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( dataProvider = "ByteArrayIOTestData" )
|
||||||
|
public void testWriteThenReadFileIntoByteArray ( int fileSize, int readBufferSize ) throws Exception {
|
||||||
|
File tempFile = createTempFile(String.format("testWriteThenReadFileIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp");
|
||||||
|
|
||||||
|
byte[] dataWritten = getDeterministicRandomData(fileSize);
|
||||||
|
IOUtils.writeByteArrayToFile(dataWritten, tempFile);
|
||||||
|
byte[] dataRead = IOUtils.readFileIntoByteArray(tempFile, readBufferSize);
|
||||||
|
|
||||||
|
Assert.assertEquals(dataRead.length, dataWritten.length);
|
||||||
|
Assert.assertTrue(Arrays.equals(dataRead, dataWritten));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( dataProvider = "ByteArrayIOTestData" )
|
||||||
|
public void testWriteThenReadStreamIntoByteArray ( int fileSize, int readBufferSize ) throws Exception {
|
||||||
|
File tempFile = createTempFile(String.format("testWriteThenReadStreamIntoByteArray_%d_%d", fileSize, readBufferSize), "tmp");
|
||||||
|
|
||||||
|
byte[] dataWritten = getDeterministicRandomData(fileSize);
|
||||||
|
IOUtils.writeByteArrayToStream(dataWritten, new FileOutputStream(tempFile));
|
||||||
|
byte[] dataRead = IOUtils.readStreamIntoByteArray(new FileInputStream(tempFile), readBufferSize);
|
||||||
|
|
||||||
|
Assert.assertEquals(dataRead.length, dataWritten.length);
|
||||||
|
Assert.assertTrue(Arrays.equals(dataRead, dataWritten));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.CouldNotReadInputFile.class )
|
||||||
|
public void testReadNonExistentFileIntoByteArray() {
|
||||||
|
File nonExistentFile = new File("djfhsdkjghdfk");
|
||||||
|
Assert.assertFalse(nonExistentFile.exists());
|
||||||
|
|
||||||
|
IOUtils.readFileIntoByteArray(nonExistentFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = ReviewedStingException.class )
|
||||||
|
public void testReadNullStreamIntoByteArray() {
|
||||||
|
IOUtils.readStreamIntoByteArray(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = ReviewedStingException.class )
|
||||||
|
public void testReadStreamIntoByteArrayInvalidBufferSize() throws Exception {
|
||||||
|
IOUtils.readStreamIntoByteArray(new FileInputStream(createTempFile("testReadStreamIntoByteArrayInvalidBufferSize", "tmp")),
|
||||||
|
-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = UserException.CouldNotCreateOutputFile.class )
|
||||||
|
public void testWriteByteArrayToUncreatableFile() {
|
||||||
|
IOUtils.writeByteArrayToFile(new byte[]{0}, new File("/dev/foo/bar"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = ReviewedStingException.class )
|
||||||
|
public void testWriteNullByteArrayToFile() {
|
||||||
|
IOUtils.writeByteArrayToFile(null, createTempFile("testWriteNullByteArrayToFile", "tmp"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = ReviewedStingException.class )
|
||||||
|
public void testWriteByteArrayToNullStream() {
|
||||||
|
IOUtils.writeByteArrayToStream(new byte[]{0}, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] getDeterministicRandomData ( int size ) {
|
||||||
|
GenomeAnalysisEngine.resetRandomGenerator();
|
||||||
|
Random rand = GenomeAnalysisEngine.getRandomGenerator();
|
||||||
|
|
||||||
|
byte[] randomData = new byte[size];
|
||||||
|
rand.nextBytes(randomData);
|
||||||
|
|
||||||
|
return randomData;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -36,6 +36,8 @@
|
||||||
<dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
|
<dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
|
||||||
<!-- All R scripts in org.broadinstitute.sting -->
|
<!-- All R scripts in org.broadinstitute.sting -->
|
||||||
<dir name="org/broadinstitute/sting" includes="**/*.R" />
|
<dir name="org/broadinstitute/sting" includes="**/*.R" />
|
||||||
|
<!-- The GATK public key -->
|
||||||
|
<file path="GATK_public.key" />
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</executable>
|
</executable>
|
||||||
<resources>
|
<resources>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue