From 320956ee4b788c1a25c07d26ee63ec4f2325f9a1 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 4 Jun 2012 10:55:36 -0400 Subject: [PATCH] Bug fix in clipping function in ReadUtils for when the read ends at exactly the clipping boundary. Bug fixes in HaplotypeCaller GGA mode for when Smith-Waterman produces a different allele than what was given in the input alleles VCF. GGA mode now works with multiallelic records. Adding min pruning factor argument which is combined with the pruning factor that is determined dynamically by the coverage. --- .../analyzecovariates/AnalysisDataManager.java | 2 +- .../analyzecovariates/AnalyzeCovariates.java | 16 ++++++++-------- .../broadinstitute/sting/utils/Haplotype.java | 6 +++--- .../sting/utils/sam/ReadUtils.java | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java index d7f0e560f..1230c86be 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java @@ -63,7 +63,7 @@ public class AnalysisDataManager { */ public final void addToAllTables( final Object[] key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) { - int qscore = Integer.parseInt( key[1].toString() ); + final int qscore = Integer.parseInt( key[1].toString() ); RecalDatum collapsedDatum; final Object[] readGroupCollapsedKey = new Object[1]; final Object[] covariateCollapsedKey = new Object[2]; diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index a99959341..59a3d8cdb 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -193,7 +193,7 @@ public class AnalyzeCovariates extends CommandLineProgram { requestedCovariates = new ArrayList(); try { - for ( String line : new XReadLines(new File( RECAL_FILE )) ) { + for ( final String line : new XReadLines(new File( RECAL_FILE )) ) { lineNumber++; if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() || line.equals(EOF_MARKER) ) { ; // Skip over the comment lines, (which start with '#') @@ -271,7 +271,7 @@ public class AnalyzeCovariates extends CommandLineProgram { key[iii] = cov.getValue( vals[iii] ); } // Create a new datum using the number of observations, number of mismatches, and reported quality score - RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 ); + final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 ); // Add that datum to all the collapsed tables which will be used in the sequential calculation dataManager.addToAllTables( key, datum, IGNORE_QSCORES_LESS_THAN ); } @@ -281,11 +281,11 @@ public class AnalyzeCovariates extends CommandLineProgram { int numReadGroups = 0; // for each read group - for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) { + for( final Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) { - if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) { - String readGroup = readGroupKey.toString(); - RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey); + if( NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS ) { + final String readGroup = readGroupKey.toString(); + final RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey); logger.info(String.format( "Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f", readGroup, readGroupDatum.getNumObservations(), @@ -308,9 +308,9 @@ public class AnalyzeCovariates extends CommandLineProgram { // Output the header output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases"); - for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) { + for( final Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet() ) { output.print( covariateKey.toString() + "\t" ); // Covariate - RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey); + final RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey); output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" ); // Qempirical output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index cadec5347..d14d0deee 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -159,9 +159,9 @@ public class Haplotype { @Requires({"refInsertLocation >= 0"}) public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) { - + if( refAllele.length() != altAllele.length() ) { refInsertLocation++; } - int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true); + final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true); if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype return new Haplotype(bases.clone()); } @@ -198,7 +198,7 @@ public class Haplotype { } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding return new Haplotype(bases.clone()); } - + return new Haplotype(newHaplotype); } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 4e2fd1446..71e08ef23 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -447,7 +447,7 @@ public class ReadUtils { if (goalReached) { // Is this base's reference position within this cigar element? Or did we use it all? - boolean endsWithinCigar = shift < cigarElement.getLength(); + boolean endsWithinCigar = shift <= cigarElement.getLength(); // If it isn't, we need to check the next one. There should *ALWAYS* be a next one // since we checked if the goal coordinate is within the read length, so this is just a sanity check.