Bug fix in clipping function in ReadUtils for when the read ends at exactly the clipping boundary. Bug fixes in HaplotypeCaller GGA mode for when Smith-Waterman produces a different allele than what was given in the input alleles VCF. GGA mode now works with multiallelic records. Adding min pruning factor argument which is combined with the pruning factor that is determined dynamically by the coverage.

2012-06-04 10:55:36 -04:00 · 2012-06-04 10:55:36 -04:00 · 320956ee4b
parent 7a54baf08c
commit 320956ee4b
4 changed files with 13 additions and 13 deletions
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java
@ -63,7 +63,7 @@ public class AnalysisDataManager {
     */
    public final void addToAllTables( final Object[] key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) {

-        int qscore = Integer.parseInt( key[1].toString() );
+        final int qscore = Integer.parseInt( key[1].toString() );
        RecalDatum collapsedDatum;
        final Object[] readGroupCollapsedKey = new Object[1];
        final Object[] covariateCollapsedKey = new Object[2];
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -193,7 +193,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
        requestedCovariates = new ArrayList<Covariate>();

        try {
-            for ( String line : new XReadLines(new File( RECAL_FILE )) ) {
+            for ( final String line : new XReadLines(new File( RECAL_FILE )) ) {
                lineNumber++;
                if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() || line.equals(EOF_MARKER) )  {
                    ; // Skip over the comment lines, (which start with '#')
@ -271,7 +271,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
            key[iii] = cov.getValue( vals[iii] );
        }
        // Create a new datum using the number of observations, number of mismatches, and reported quality score
-        RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
+        final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
        // Add that datum to all the collapsed tables which will be used in the sequential calculation
        dataManager.addToAllTables( key, datum, IGNORE_QSCORES_LESS_THAN );
    }
@ -281,11 +281,11 @@ public class AnalyzeCovariates extends CommandLineProgram {
        int numReadGroups = 0;

        // for each read group
-        for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
+        for( final Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {

-            if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
-                String readGroup = readGroupKey.toString();
-                RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
+            if( NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS ) {
+                final String readGroup = readGroupKey.toString();
+                final RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
                logger.info(String.format(
                        "Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f",
                        readGroup, readGroupDatum.getNumObservations(),
@ -308,9 +308,9 @@ public class AnalyzeCovariates extends CommandLineProgram {
                        // Output the header
                        output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");

-                        for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
+                        for( final Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet() ) {
                            output.print( covariateKey.toString() + "\t" );                                                     // Covariate
-                            RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
+                            final RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
                            output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" );                    // Qreported
                            output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" );  // Qempirical
                            output.print( thisDatum.getNumMismatches() + "\t" );                                                // nMismatches
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@ -159,9 +159,9 @@ public class Haplotype {

    @Requires({"refInsertLocation >= 0"})
    public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) {
-        
+
        if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
-        int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
+        final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
        if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
            return new Haplotype(bases.clone());
        }
@ -198,7 +198,7 @@ public class Haplotype {
        } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
            return new Haplotype(bases.clone());
        }
-        
+
        return new Haplotype(newHaplotype);
    }

--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@ -447,7 +447,7 @@ public class ReadUtils {

            if (goalReached) {
                // Is this base's reference position within this cigar element? Or did we use it all?
-                boolean endsWithinCigar = shift < cigarElement.getLength();
+                boolean endsWithinCigar = shift <= cigarElement.getLength();

                // If it isn't, we need to check the next one. There should *ALWAYS* be a next one
                // since we checked if the goal coordinate is within the read length, so this is just a sanity check.