Bug fix in clipping function in ReadUtils for when the read ends at exactly the clipping boundary. Bug fixes in HaplotypeCaller GGA mode for when Smith-Waterman produces a different allele than what was given in the input alleles VCF. GGA mode now works with multiallelic records. Adding min pruning factor argument which is combined with the pruning factor that is determined dynamically by the coverage.

This commit is contained in:
Ryan Poplin 2012-06-04 10:55:36 -04:00
parent 7a54baf08c
commit 320956ee4b
4 changed files with 13 additions and 13 deletions

View File

@ -63,7 +63,7 @@ public class AnalysisDataManager {
*/
public final void addToAllTables( final Object[] key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) {
int qscore = Integer.parseInt( key[1].toString() );
final int qscore = Integer.parseInt( key[1].toString() );
RecalDatum collapsedDatum;
final Object[] readGroupCollapsedKey = new Object[1];
final Object[] covariateCollapsedKey = new Object[2];

View File

@ -193,7 +193,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
requestedCovariates = new ArrayList<Covariate>();
try {
for ( String line : new XReadLines(new File( RECAL_FILE )) ) {
for ( final String line : new XReadLines(new File( RECAL_FILE )) ) {
lineNumber++;
if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() || line.equals(EOF_MARKER) ) {
; // Skip over the comment lines, (which start with '#')
@ -271,7 +271,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
key[iii] = cov.getValue( vals[iii] );
}
// Create a new datum using the number of observations, number of mismatches, and reported quality score
RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
// Add that datum to all the collapsed tables which will be used in the sequential calculation
dataManager.addToAllTables( key, datum, IGNORE_QSCORES_LESS_THAN );
}
@ -281,11 +281,11 @@ public class AnalyzeCovariates extends CommandLineProgram {
int numReadGroups = 0;
// for each read group
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
for( final Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
String readGroup = readGroupKey.toString();
RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
if( NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS ) {
final String readGroup = readGroupKey.toString();
final RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
logger.info(String.format(
"Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f",
readGroup, readGroupDatum.getNumObservations(),
@ -308,9 +308,9 @@ public class AnalyzeCovariates extends CommandLineProgram {
// Output the header
output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
for( final Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet() ) {
output.print( covariateKey.toString() + "\t" ); // Covariate
RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
final RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported
output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" ); // Qempirical
output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches

View File

@ -159,9 +159,9 @@ public class Haplotype {
@Requires({"refInsertLocation >= 0"})
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) {
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
return new Haplotype(bases.clone());
}
@ -198,7 +198,7 @@ public class Haplotype {
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
return new Haplotype(bases.clone());
}
return new Haplotype(newHaplotype);
}

View File

@ -447,7 +447,7 @@ public class ReadUtils {
if (goalReached) {
// Is this base's reference position within this cigar element? Or did we use it all?
boolean endsWithinCigar = shift < cigarElement.getLength();
boolean endsWithinCigar = shift <= cigarElement.getLength();
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.