Fix a hashing function bug. Ignore reads with non-reference bases in the pileup.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1842 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-10-14 19:41:26 +00:00
parent 72c34f11dd
commit 1f66738c8e
1 changed files with 13 additions and 9 deletions

View File

@ -31,7 +31,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Referen
@Argument(fullName="useSecondaryBase",doc="Use the secondary base of a read as part of the calculation", required=false)
boolean useSecondaryBase = false;
@Argument(fullName="confidentRefThreshold",doc="Set the lod score that defines confidence in ref, defaults to 4", required=false)
int confidentRefThreshold = 4;
int confidentRefThreshold = 5;
@Argument(fullName="pileupMismatchThreshold",doc="Set the maximum number of mismatches at a locus before choosing not to use it in calculation. Defaults to 1.", required=false)
int pileupMismatchThreshold = 1;
@ -107,23 +107,25 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Referen
public boolean includeRead ( SAMRecord read, int offset) {
// todo -- do we want to filter out individual reads?
return true;
return ! readWindowContainsNonBaseCharacters(read,offset,read.getNotPrimaryAlignmentFlag());
}
public boolean readWindowContainsNonBaseCharacters( SAMRecord read, int offset, int posNeg ) {
public boolean readWindowContainsNonBaseCharacters( SAMRecord read, int offset, boolean isNegative ) {
byte[] bases = read.getReadBases();
if ( posNeg > 0 ) {
for ( int i = offset; i < offset + nPreviousBases; i ++ ) {
// System.out.println("readWindowContainsNonBaseCharacters");
if ( ! isNegative ) {
for ( int i = offset; i <= offset + nPreviousBases; i ++ ) {
char base = Character.toUpperCase(convertIUPACByteToChar(bases[i]));
// System.out.println(base);
if ( ! ( base == 'A' || base == 'G' || base == 'C' || base == 'T') ) {
return true;
}
}
return false;
} else {
for ( int i = offset; i > offset - nPreviousBases; i -- ) {
for ( int i = offset; i >= offset - nPreviousBases; i -- ) {
char base = Character.toUpperCase(convertIUPACByteToChar(bases[i]));
// System.out.println(base);
if ( ! ( base == 'A' || base == 'G' || base == 'C' || base == 'T') ) {
return true;
}
@ -236,6 +238,7 @@ class BaseTransitionTable {
} else {
context = context + bases[1];
}
System.out.println(context);
confusionTable[strHash.hash(context)][strHash.hash(bases[0])] ++;
}
@ -290,7 +293,7 @@ class BaseStringHash {
}
public int maxHash() {
return (int) Math.round(Math.pow(4,stringLength));
return hashSize()-1;
}
public int hashSize() {
@ -317,10 +320,11 @@ class BaseStringHash {
}
public int recursiveHash( String s, int offset ) {
// System.out.println(s+"\t"+offset);
if ( offset == s.length() ) {
return 0;
} else {
return (int) Math.round(hash(s.charAt(offset))*Math.pow(4,s.length()-offset)) + recursiveHash(s, offset+1);
return (int) Math.round(hash(s.charAt(offset))*Math.pow(4,s.length()-offset-1)) + recursiveHash(s, offset+1);
}
}