Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
ca11f68303
|
|
@ -1,5 +1,5 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa"
|
export BWA_HOME="/humgen/gsa-scr1/hanna/src/bio-bwa/bwa"
|
||||||
export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
|
export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
|
||||||
export TARGET_LIB="libbwa.so"
|
export TARGET_LIB="libbwa.so"
|
||||||
export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"
|
export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"
|
||||||
|
|
|
||||||
|
|
@ -233,6 +233,8 @@ void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_s
|
||||||
void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; }
|
void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; }
|
||||||
void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; }
|
void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; }
|
||||||
void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; }
|
void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; }
|
||||||
|
void BWA::set_mode_nonstop() { options.mode |= BWA_MODE_NONSTOP; options.max_top2 = 0x7fffffff; }
|
||||||
|
void BWA::set_max_entries_in_queue(int max_entries) { options.max_entries = max_entries; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a sequence with a set of reasonable initial defaults.
|
* Create a sequence with a set of reasonable initial defaults.
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,8 @@ class BWA {
|
||||||
void set_mismatch_penalty(int penalty);
|
void set_mismatch_penalty(int penalty);
|
||||||
void set_gap_open_penalty(int penalty);
|
void set_gap_open_penalty(int penalty);
|
||||||
void set_gap_extension_penalty(int penalty);
|
void set_gap_extension_penalty(int penalty);
|
||||||
|
void set_mode_nonstop();
|
||||||
|
void set_max_entries_in_queue(int max_entries);
|
||||||
|
|
||||||
// Perform the alignment
|
// Perform the alignment
|
||||||
Alignment* generate_single_alignment(const char* bases,
|
Alignment* generate_single_alignment(const char* bases,
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,13 @@
|
||||||
#include "bwa_gateway.h"
|
#include "bwa_gateway.h"
|
||||||
#include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h"
|
#include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h"
|
||||||
|
|
||||||
|
typedef void (BWA::*boolean_setter)();
|
||||||
typedef void (BWA::*int_setter)(int value);
|
typedef void (BWA::*int_setter)(int value);
|
||||||
typedef void (BWA::*float_setter)(float value);
|
typedef void (BWA::*float_setter)(float value);
|
||||||
|
|
||||||
static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
|
static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
|
||||||
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name);
|
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name);
|
||||||
|
static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter);
|
||||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
|
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
|
||||||
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
|
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
|
||||||
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
|
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
|
||||||
|
|
@ -100,6 +102,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
if(env->ExceptionCheck()) return;
|
if(env->ExceptionCheck()) return;
|
||||||
set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty);
|
set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty);
|
||||||
if(env->ExceptionCheck()) return;
|
if(env->ExceptionCheck()) return;
|
||||||
|
set_boolean_configuration_param(env, configuration, "nonStopMode", bwa, &BWA::set_mode_nonstop);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
set_int_configuration_param(env, configuration, "maxEntriesInQueue", bwa, &BWA::set_max_entries_in_queue);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases)
|
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases)
|
||||||
|
|
@ -357,6 +363,36 @@ static jstring get_configuration_file(JNIEnv* env, jobject configuration, const
|
||||||
return path;
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter) {
|
||||||
|
jclass configuration_class = env->GetObjectClass(configuration);
|
||||||
|
if(configuration_class == NULL) return;
|
||||||
|
|
||||||
|
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Boolean;");
|
||||||
|
if(configuration_field == NULL) return;
|
||||||
|
|
||||||
|
jobject boxed_value = env->GetObjectField(configuration,configuration_field);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
|
||||||
|
if(boxed_value != NULL) {
|
||||||
|
jclass boolean_box_class = env->FindClass("java/lang/Boolean");
|
||||||
|
if(boolean_box_class == NULL) return;
|
||||||
|
|
||||||
|
jmethodID boolean_extractor = env->GetMethodID(boolean_box_class,"booleanValue", "()Z");
|
||||||
|
if(boolean_extractor == NULL) return;
|
||||||
|
|
||||||
|
jboolean value = env->CallBooleanMethod(boxed_value,boolean_extractor);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
|
||||||
|
if(value)
|
||||||
|
(bwa->*setter)();
|
||||||
|
|
||||||
|
env->DeleteLocalRef(boolean_box_class);
|
||||||
|
}
|
||||||
|
|
||||||
|
env->DeleteLocalRef(boxed_value);
|
||||||
|
env->DeleteLocalRef(configuration_class);
|
||||||
|
}
|
||||||
|
|
||||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
|
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
|
||||||
jclass configuration_class = env->GetObjectClass(configuration);
|
jclass configuration_class = env->GetObjectClass(configuration);
|
||||||
if(configuration_class == NULL) return;
|
if(configuration_class == NULL) return;
|
||||||
|
|
|
||||||
|
|
@ -41,4 +41,14 @@ public class BWAConfiguration {
|
||||||
* What is the scoring penalty for a gap extension?
|
* What is the scoring penalty for a gap extension?
|
||||||
*/
|
*/
|
||||||
public Integer gapExtensionPenalty = null;
|
public Integer gapExtensionPenalty = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enter bwa's 'non-stop' mode (equivalent to bwa aln -N parameter).
|
||||||
|
*/
|
||||||
|
public Boolean nonStopMode = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the max queue size that bwa will use when searching for matches (equivalent to bwa aln -m parameter).
|
||||||
|
*/
|
||||||
|
public Integer maxEntriesInQueue = null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,7 @@ public class GATKBAMIndex {
|
||||||
seek(0);
|
seek(0);
|
||||||
final byte[] buffer = readBytes(4);
|
final byte[] buffer = readBytes(4);
|
||||||
if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
|
if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
|
||||||
throw new RuntimeException("Invalid file header in BAM index " + mFile +
|
throw new ReviewedStingException("Invalid file header in BAM index " + mFile +
|
||||||
": " + new String(buffer));
|
": " + new String(buffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -112,7 +112,7 @@ public class GATKBAMIndex {
|
||||||
openIndexFile();
|
openIndexFile();
|
||||||
|
|
||||||
if (referenceSequence >= sequenceCount)
|
if (referenceSequence >= sequenceCount)
|
||||||
throw new ReviewedStingException("Invalid sequence number " + referenceSequence);
|
throw new ReviewedStingException("Invalid sequence number " + referenceSequence + " in index file " + mFile);
|
||||||
|
|
||||||
skipToSequence(referenceSequence);
|
skipToSequence(referenceSequence);
|
||||||
|
|
||||||
|
|
@ -183,12 +183,12 @@ public class GATKBAMIndex {
|
||||||
public int getLevelForBin(final Bin bin) {
|
public int getLevelForBin(final Bin bin) {
|
||||||
GATKBin gatkBin = new GATKBin(bin);
|
GATKBin gatkBin = new GATKBin(bin);
|
||||||
if(gatkBin.getBinNumber() >= MAX_BINS)
|
if(gatkBin.getBinNumber() >= MAX_BINS)
|
||||||
throw new SAMException("Tried to get level for invalid bin.");
|
throw new ReviewedStingException("Tried to get level for invalid bin in index file " + mFile);
|
||||||
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
||||||
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
|
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
throw new SAMException("Unable to find correct bin for bin "+bin);
|
throw new ReviewedStingException("Unable to find correct bin for bin " + bin + " in index file " + mFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -352,7 +352,7 @@ public class GATKBAMIndex {
|
||||||
fileChannel.read(buffer);
|
fileChannel.read(buffer);
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to read bytes from index file.");
|
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -379,7 +379,7 @@ public class GATKBAMIndex {
|
||||||
fileChannel.position(fileChannel.position() + count);
|
fileChannel.position(fileChannel.position() + count);
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to reposition file channel.");
|
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -388,7 +388,7 @@ public class GATKBAMIndex {
|
||||||
fileChannel.position(position);
|
fileChannel.position(position);
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to reposition of file channel.");
|
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -858,171 +858,4 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
return calls;
|
return calls;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param vc variant context with genotype likelihoods
|
|
||||||
* @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use
|
|
||||||
* @param exactAC integer array describing the AC from the exact model for the corresponding alleles
|
|
||||||
* @return genotypes
|
|
||||||
*/
|
|
||||||
public static GenotypesContext constrainedAssignGenotypes(VariantContext vc, boolean[] allelesToUse, int[] exactAC ) {
|
|
||||||
|
|
||||||
final GenotypesContext GLs = vc.getGenotypes();
|
|
||||||
|
|
||||||
// samples
|
|
||||||
final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
|
|
||||||
|
|
||||||
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
|
||||||
final int numOriginalAltAlleles = allelesToUse.length;
|
|
||||||
final List<Allele> newAlleles = new ArrayList<Allele>(numOriginalAltAlleles+1);
|
|
||||||
newAlleles.add(vc.getReference());
|
|
||||||
final HashMap<Allele,Integer> alleleIndexMap = new HashMap<Allele,Integer>(); // need this for skipping dimensions
|
|
||||||
int[] alleleCount = new int[exactAC.length];
|
|
||||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
|
||||||
if ( allelesToUse[i] ) {
|
|
||||||
newAlleles.add(vc.getAlternateAllele(i));
|
|
||||||
alleleIndexMap.put(vc.getAlternateAllele(i),i);
|
|
||||||
alleleCount[i] = exactAC[i];
|
|
||||||
} else {
|
|
||||||
alleleCount[i] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
final List<Allele> newAltAlleles = newAlleles.subList(1,newAlleles.size());
|
|
||||||
final int numNewAltAlleles = newAltAlleles.size();
|
|
||||||
ArrayList<Integer> likelihoodIndexesToUse = null;
|
|
||||||
|
|
||||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
|
||||||
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
|
||||||
final int[][] PLcache;
|
|
||||||
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
|
|
||||||
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
|
||||||
PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
|
|
||||||
|
|
||||||
for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) {
|
|
||||||
int[] alleles = PLcache[PLindex];
|
|
||||||
// consider this entry only if both of the alleles are good
|
|
||||||
if ( (alleles[0] == 0 || allelesToUse[alleles[0] - 1]) && (alleles[1] == 0 || allelesToUse[alleles[1] - 1]) )
|
|
||||||
likelihoodIndexesToUse.add(PLindex);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
|
|
||||||
}
|
|
||||||
|
|
||||||
// set up the trellis dimensions
|
|
||||||
// SAMPLE x alt 1 x alt 2 x alt 3
|
|
||||||
// todo -- check that exactAC has alt counts at [1],[2],[3] (and not [0],[1],[2])
|
|
||||||
double[][][][] transitionTrellis = new double[sampleIndices.size()+1][exactAC[1]][exactAC[2]][exactAC[3]];
|
|
||||||
// N x AC1 x AC2 x AC3; worst performance in multi-allelic where all alleles are moderate frequency
|
|
||||||
// capped at the MLE ACs*
|
|
||||||
// todo -- there's an optimization: not all states in the rectangular matrix will be reached, in fact
|
|
||||||
// todo -- for tT[0] we only care about tT[0][0][0][0], and for tT[1], only combinations of 0,1,2.
|
|
||||||
int idx = 1; // index of which sample we're on
|
|
||||||
int prevMaxState = 0; // the maximum state (e.g. AC) reached by the previous sample. Symmetric. (AC capping handled by logic in loop)
|
|
||||||
// iterate over each sample
|
|
||||||
for ( String sample : sampleIndices ) {
|
|
||||||
// push the likelihoods into the next possible states, that is to say
|
|
||||||
// L[state] = L[prev state] + L[genotype getting into state]
|
|
||||||
// iterate over each previous state, by dimension
|
|
||||||
// and contribute the likelihoods for transitions to this state
|
|
||||||
double[][][] prevState = transitionTrellis[idx-1];
|
|
||||||
double[][][] thisState = transitionTrellis[idx];
|
|
||||||
Genotype genotype = GLs.get(sample);
|
|
||||||
if ( genotype.isNoCall() || genotype.isFiltered() ) {
|
|
||||||
thisState = prevState.clone();
|
|
||||||
} else {
|
|
||||||
double[] likelihoods = genotype.getLikelihoods().getAsVector();
|
|
||||||
int dim1min = Math.max(0, alleleCount[0]-2*(sampleIndices.size()-idx+1));
|
|
||||||
int dim1max = Math.min(prevMaxState,alleleCount[0]);
|
|
||||||
int dim2min = Math.max(0,alleleCount[1]-2*(sampleIndices.size()-idx+1));
|
|
||||||
int dim2max = Math.min(prevMaxState,alleleCount[1]);
|
|
||||||
int dim3min = Math.max(0,alleleCount[2]-2*(sampleIndices.size()-idx+1));
|
|
||||||
int dim3max = Math.min(prevMaxState,alleleCount[2]);
|
|
||||||
// cue annoying nested for loop
|
|
||||||
for ( int a1 = dim1min ; a1 <= dim1max; a1++ ) {
|
|
||||||
for ( int a2 = dim2min; a2 <= dim2max; a2++ ) {
|
|
||||||
for ( int a3 = dim3min; a3 <= dim3max; a3++ ) {
|
|
||||||
double base = prevState[a1][a2][a3];
|
|
||||||
for ( int likIdx : likelihoodIndexesToUse ) {
|
|
||||||
int[] offsets = calculateOffsets(PLcache[likIdx]);
|
|
||||||
thisState[a1+offsets[1]][a2+offsets[2]][a3+offsets[3]] = base + likelihoods[likIdx];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
prevMaxState += 2;
|
|
||||||
}
|
|
||||||
idx++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// after all that pain, we have a fully calculated trellis. Now just march backwards from the EAC state and
|
|
||||||
// assign genotypes along the greedy path
|
|
||||||
|
|
||||||
GenotypesContext calls = GenotypesContext.create(sampleIndices.size());
|
|
||||||
int[] state = alleleCount;
|
|
||||||
for ( String sample : Utils.reverse(sampleIndices) ) {
|
|
||||||
--idx;
|
|
||||||
// the next state will be the maximum achievable state
|
|
||||||
Genotype g = GLs.get(sample);
|
|
||||||
if ( g.isNoCall() || ! g.hasLikelihoods() ) {
|
|
||||||
calls.add(g);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// subset to the new likelihoods. These are not used except for subsetting in the context iself.
|
|
||||||
// i.e. they are not a part of the calculation.
|
|
||||||
final double[] originalLikelihoods = GLs.get(sample).getLikelihoods().getAsVector();
|
|
||||||
double[] newLikelihoods;
|
|
||||||
if ( likelihoodIndexesToUse == null ) {
|
|
||||||
newLikelihoods = originalLikelihoods;
|
|
||||||
} else {
|
|
||||||
newLikelihoods = new double[likelihoodIndexesToUse.size()];
|
|
||||||
int newIndex = 0;
|
|
||||||
for ( int oldIndex : likelihoodIndexesToUse )
|
|
||||||
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
|
||||||
|
|
||||||
// might need to re-normalize
|
|
||||||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo -- alter this. For ease of programming, likelihood indeces are
|
|
||||||
// todo -- used to iterate over achievable states.
|
|
||||||
double max = Double.NEGATIVE_INFINITY;
|
|
||||||
int[] bestState = null;
|
|
||||||
int[] bestAlleles = null;
|
|
||||||
int bestLikIdx = -1;
|
|
||||||
for ( int likIdx : likelihoodIndexesToUse ) {
|
|
||||||
int[] offsets = calculateOffsets(PLcache[likIdx]);
|
|
||||||
double val = transitionTrellis[idx-1][state[0]-offsets[0]][state[1]-offsets[1]][state[2]-offsets[2]];
|
|
||||||
if ( val > max ) {
|
|
||||||
max = val;
|
|
||||||
bestState = new int[] { state[0]-offsets[0],state[1]-offsets[1],state[2]-offsets[2]};
|
|
||||||
bestAlleles = PLcache[likIdx];
|
|
||||||
bestLikIdx = likIdx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
state = bestState;
|
|
||||||
List<Allele> gtAlleles = new ArrayList<Allele>(2);
|
|
||||||
gtAlleles.add(newAlleles.get(bestAlleles[0]));
|
|
||||||
gtAlleles.add(newAlleles.get(bestAlleles[1]));
|
|
||||||
|
|
||||||
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(bestLikIdx, newLikelihoods);
|
|
||||||
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
|
||||||
if ( numNewAltAlleles == 0 )
|
|
||||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
|
||||||
else
|
|
||||||
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
|
|
||||||
calls.add(new Genotype(sample, gtAlleles, qual, null, attrs, false));
|
|
||||||
|
|
||||||
}
|
|
||||||
return calls;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int[] calculateOffsets(int[] alleleIndeces) {
|
|
||||||
int[] offsets = new int[4];
|
|
||||||
for ( int i = 0; i < alleleIndeces.length; i++ ) {
|
|
||||||
offsets[alleleIndeces[i]]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return offsets;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
|
||||||
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
|
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
|
||||||
|
|
||||||
protected final static Logger log = Logger.getLogger(VCFCodec.class);
|
protected final static Logger log = Logger.getLogger(VCFCodec.class);
|
||||||
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
|
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
|
||||||
|
|
@ -252,7 +253,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
|
|
||||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||||
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
||||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||||
" tokens, and saw " + nParts + " )", lineNo);
|
" tokens, and saw " + nParts + " )", lineNo);
|
||||||
|
|
||||||
|
|
@ -518,8 +519,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
* @param lineNo the line number for this record
|
* @param lineNo the line number for this record
|
||||||
*/
|
*/
|
||||||
private static void checkAllele(String allele, boolean isRef, int lineNo) {
|
private static void checkAllele(String allele, boolean isRef, int lineNo) {
|
||||||
if ( allele == null || allele.length() == 0 )
|
if ( allele == null || allele.length() == 0 )
|
||||||
generateException("Empty alleles are not permitted in VCF records", lineNo);
|
generateException("Empty alleles are not permitted in VCF records", lineNo);
|
||||||
|
|
||||||
|
if ( MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING )
|
||||||
|
log.warn(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
|
||||||
|
|
||||||
if ( isSymbolicAllele(allele) ) {
|
if ( isSymbolicAllele(allele) ) {
|
||||||
if ( isRef ) {
|
if ( isRef ) {
|
||||||
|
|
@ -572,12 +576,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
|
|
||||||
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||||
boolean clipping = true;
|
boolean clipping = true;
|
||||||
|
final byte ref0 = (byte)ref.charAt(0);
|
||||||
|
|
||||||
for ( Allele a : unclippedAlleles ) {
|
for ( Allele a : unclippedAlleles ) {
|
||||||
if ( a.isSymbolic() )
|
if ( a.isSymbolic() )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) {
|
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
|
||||||
clipping = false;
|
clipping = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -604,7 +609,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
stillClipping = false;
|
stillClipping = false;
|
||||||
else if ( ref.length() == clipping )
|
else if ( ref.length() == clipping )
|
||||||
generateException("bad alleles encountered", lineNo);
|
generateException("bad alleles encountered", lineNo);
|
||||||
else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] )
|
else if ( a.getBases()[a.length()-clipping-1] != ((byte)ref.charAt(ref.length()-clipping-1)) )
|
||||||
stillClipping = false;
|
stillClipping = false;
|
||||||
}
|
}
|
||||||
if ( stillClipping )
|
if ( stillClipping )
|
||||||
|
|
@ -613,6 +618,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
|
|
||||||
return clipping;
|
return clipping;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* clip the alleles, based on the reference
|
* clip the alleles, based on the reference
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
public class VCFCodecUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Provider
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class AlleleClippingTestProvider extends TestDataProvider {
|
||||||
|
final String ref;
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
final int expectedClip;
|
||||||
|
|
||||||
|
private AlleleClippingTestProvider(final int expectedClip, final String ref, final String ... alleles) {
|
||||||
|
super(AlleleClippingTestProvider.class);
|
||||||
|
this.ref = ref;
|
||||||
|
for ( final String allele : alleles )
|
||||||
|
this.alleles.add(Allele.create(allele));
|
||||||
|
this.expectedClip = expectedClip;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "AlleleClippingTestProvider")
|
||||||
|
public Object[][] MakeAlleleClippingTest() {
|
||||||
|
// pair clipping
|
||||||
|
new AlleleClippingTestProvider(0, "ATT", "CCG");
|
||||||
|
new AlleleClippingTestProvider(1, "ATT", "CCT");
|
||||||
|
new AlleleClippingTestProvider(2, "ATT", "CTT");
|
||||||
|
new AlleleClippingTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
||||||
|
|
||||||
|
// triplets
|
||||||
|
new AlleleClippingTestProvider(0, "ATT", "CTT", "CGG");
|
||||||
|
new AlleleClippingTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
||||||
|
new AlleleClippingTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
||||||
|
|
||||||
|
return AlleleClippingTestProvider.getTests(AlleleClippingTestProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(dataProvider = "AlleleClippingTestProvider")
|
||||||
|
public void TestAlleleClipping(AlleleClippingTestProvider cfg) {
|
||||||
|
int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref, 0, 1);
|
||||||
|
Assert.assertEquals(result, cfg.expectedClip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -134,8 +134,8 @@ class GATKResourcesBundle extends QScript {
|
||||||
addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf",
|
addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf",
|
||||||
"1000G_biallelic.indels", b37, true, false))
|
"1000G_biallelic.indels", b37, true, false))
|
||||||
|
|
||||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
|
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
|
||||||
"Mills_Devine_2hit.indels", b37, true, true))
|
"Mills_and_1000G_gold_standard.indels", b37, true, true))
|
||||||
|
|
||||||
//
|
//
|
||||||
// example call set for wiki tutorial
|
// example call set for wiki tutorial
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
||||||
<ivy-module version="1.0">
|
<ivy-module version="1.0">
|
||||||
<info organisation="org.broad" module="tribble" revision="46" status="integration" />
|
<info organisation="org.broad" module="tribble" revision="53" status="integration" />
|
||||||
</ivy-module>
|
</ivy-module>
|
||||||
Loading…
Reference in New Issue