diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
index 8826de232..864be55b7 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@@ -19,8 +19,19 @@ import java.util.Map;
/**
* Total (unfiltered) depth over all samples.
*
- * Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D
- * is N * D
+ * This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
+ * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
+ * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
+ * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
+ * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
+ * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
+ * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
+ * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
+ * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
+ * normally be excluded from the statistical calculations going into GQ and QUAL.
+ *
+ * Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
+ * -dcov D is N * D
*/
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
index 1cd30c51d..5d706d9c5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@@ -24,9 +24,9 @@ import java.util.Map;
/**
- * The depth of coverage of each VCF allele in this sample
+ * The depth of coverage of each VCF allele in this sample.
*
- * Complementary fields that two important ways of thinking about the depth of the data for this sample
+ * This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
@@ -38,8 +38,8 @@ import java.util.Map;
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
- * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base
- * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what
+ * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base
+ * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what
* determine the genotype calls (see below).
*/
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index 11cde5ffe..58b33924b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -71,9 +71,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// gdebug removeme
// todo -cleanup
- private HaplotypeIndelErrorModel model;
- private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
-//
private GenomeLoc lastSiteVisited;
private ArrayList alleleList;
@@ -84,26 +81,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
- if (UAC.GSA_PRODUCTION_ONLY == false) {
- pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
- UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
- useOldWrongHorribleHackedUpLikelihoodModel = false;
- }
- else {
- useOldWrongHorribleHackedUpLikelihoodModel = true;
- double INSERTION_START_PROBABILITY = 1e-3;
-
- double INSERTION_END_PROBABILITY = 0.5;
-
- double ALPHA_DELETION_PROBABILITY = 1e-3;
-
-
- model = new HaplotypeIndelErrorModel(3, INSERTION_START_PROBABILITY,
- INSERTION_END_PROBABILITY,ALPHA_DELETION_PROBABILITY,UAC.INDEL_HAPLOTYPE_SIZE, false, UAC.OUTPUT_DEBUG_INDEL_INFO);
- }
-
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
- UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
+ UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.BANDED_INDEL_COMPUTATION);
alleleList = new ArrayList();
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
@@ -122,10 +101,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
GenomeLoc loc = ref.getLocus();
ArrayList aList = new ArrayList();
- if (DEBUG) {
- System.out.println("'''''''''''''''''''''");
- System.out.println("Loc:"+loc.toString());
- }
HashMap consensusIndelStrings = new HashMap();
int insCount = 0, delCount = 0;
@@ -159,12 +134,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
continue;
}
- if (DEBUG && p.isIndel()) {
+/* if (DEBUG && p.isIndel()) {
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
p.getEventLength(),p.getType().toString(), p.getEventBases());
}
-
+ */
String indelString = p.getEventBases();
if (p.isInsertion()) {
@@ -234,7 +209,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
}
- if (DEBUG) {
+/* if (DEBUG) {
int icount = indelPileup.getNumberOfInsertions();
int dcount = indelPileup.getNumberOfDeletions();
if (icount + dcount > 0)
@@ -248,7 +223,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
System.out.println();
}
- }
+ } */
}
int maxAlleleCnt = 0;
@@ -259,8 +234,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
maxAlleleCnt = curCnt;
bestAltAllele = s;
}
- if (DEBUG)
- System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) );
+// if (DEBUG)
+// System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) );
} //gdebug-
if (maxAlleleCnt < minIndelCountForGenotyping)
@@ -383,18 +358,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
}
}
- int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
- int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
- int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
- if (useOldWrongHorribleHackedUpLikelihoodModel) {
- numPrefBases = 20;
- hsize=80;
- }
- if (DEBUG)
- System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
- (int)ref.getWindow().size(), loc.getStart(), numPrefBases);
- //System.out.println(eventLength);
+ final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
+ final int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
+ final int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
+
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
ref, hsize, numPrefBases);
@@ -413,13 +381,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
pileup = context.getBasePileup();
if (pileup != null ) {
- double[] genotypeLikelihoods;
-
- if (useOldWrongHorribleHackedUpLikelihoodModel)
- genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
- else
- genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
-
+ final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
alleleList,
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 3c8fd4451..f437db680 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -143,31 +143,24 @@ public class UnifiedArgumentCollection {
@Hidden
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
public int INDEL_HAPLOTYPE_SIZE = 80;
- @Hidden
- @Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
- public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true;
+
//gdebug+
// experimental arguments, NOT TO BE USED BY ANYONE WHOSE INITIALS AREN'T GDA!!!
+// @Hidden
+// @Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
+// public boolean GET_GAP_PENALTIES_FROM_DATA = false;
+//
+// @Hidden
+// @Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
+// public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
@Hidden
- @Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
- public boolean GET_GAP_PENALTIES_FROM_DATA = false;
-
- @Hidden
- @Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
- public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
+ @Argument(fullName = "bandedIndel", shortName = "bandedIndel", doc = "Banded Indel likelihood computation", required = false)
+ public boolean BANDED_INDEL_COMPUTATION = false;
@Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
- @Hidden
- @Argument(fullName = "dovit", shortName = "dovit", doc = "Perform full Viterbi calculation when evaluating the HMM", required = false)
- public boolean dovit = false;
-
- @Hidden
- @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
- public boolean GSA_PRODUCTION_ONLY = false;
-
@Hidden
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false;
@@ -204,17 +197,12 @@ public class UnifiedArgumentCollection {
uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
- uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES;
uac.alleles = alleles;
- uac.GET_GAP_PENALTIES_FROM_DATA = GET_GAP_PENALTIES_FROM_DATA;
- uac.INDEL_RECAL_FILE = INDEL_RECAL_FILE;
// todo- arguments to remove
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
- uac.dovit = dovit;
- uac.GSA_PRODUCTION_ONLY = GSA_PRODUCTION_ONLY;
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
-
+ uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
return uac;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
index 706b3abf7..55cc38e04 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
+import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
@@ -45,48 +46,15 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
-/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate;
-import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager;
-import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum;
-import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection;
-*/
-
public class PairHMMIndelErrorModel {
-
-
public static final int BASE_QUAL_THRESHOLD = 20;
-
- private static final int MATCH_OFFSET = 0;
- private static final int X_OFFSET = 1;
- private static final int Y_OFFSET = 2;
-
- private static final int DIAG = 0;
- private static final int UP = 1;
- private static final int LEFT = 2;
-
- private static final int DIAG_GOTO_M = 0;
- private static final int DIAG_GOTO_X = 1;
- private static final int DIAG_GOTO_Y = 2;
-
- private static final int UP_GOTO_M = 4;
- private static final int UP_GOTO_X = 5;
- private static final int UP_GOTO_Y = 6;
-
- private static final int LEFT_GOTO_M = 8;
- private static final int LEFT_GOTO_X = 9;
- private static final int LEFT_GOTO_Y = 10;
-
- private static final int[] ACTIONS_M = {DIAG_GOTO_M, DIAG_GOTO_X, DIAG_GOTO_Y};
- private static final int[] ACTIONS_X = {UP_GOTO_M, UP_GOTO_X, UP_GOTO_Y};
- private static final int[] ACTIONS_Y = {LEFT_GOTO_M, LEFT_GOTO_X, LEFT_GOTO_Y};
-
-
private final double logGapOpenProbability;
private final double logGapContinuationProbability;
private boolean DEBUG = false;
+ private boolean bandedLikelihoods = false;
private static final int MAX_CACHED_QUAL = 127;
@@ -103,36 +71,13 @@ public class PairHMMIndelErrorModel {
private static final double MIN_GAP_CONT_PENALTY = 10.0;
private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.
-
- private boolean doViterbi = false;
-
- private final boolean useAffineGapModel = true;
- private boolean doContextDependentPenalties = false;
-
private final double[] GAP_OPEN_PROB_TABLE;
private final double[] GAP_CONT_PROB_TABLE;
- private boolean getGapPenaltiesFromFile = false;
-
- private int SMOOTHING = 1;
- private int MAX_QUALITY_SCORE = 50;
- private int PRESERVE_QSCORES_LESS_THAN = 5;
-
/////////////////////////////
// Private Member Variables
/////////////////////////////
-//copy+
-/* private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
- private final ArrayList requestedCovariates = new ArrayList(); // List of covariates to be used in this calculation
- private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
- private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
- private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
- protected static final String EOF_MARKER = "EOF";
- private long numReadsWithMalformedColorSpace = 0;
- private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
- private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
- */
-//copy-
+
static {
LOG_ONE_HALF= -Math.log10(2.0);
END_GAP_COST = LOG_ONE_HALF;
@@ -148,143 +93,11 @@ public class PairHMMIndelErrorModel {
}
}
- public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit,boolean gpf, File RECAL_FILE) {
-
- this(indelGOP, indelGCP, deb, doCDP, dovit);
- this.getGapPenaltiesFromFile = gpf;
-
- // read data from recal file
- // gdebug - start copy from TableRecalibrationWalker
-/* if (gpf) {
- boolean sawEOF = false;
- boolean REQUIRE_EOF = false;
-
- int lineNumber = 0;
- boolean foundAllCovariates = false;
- // Get a list of all available covariates
- final List> classes = new PluginManager(Covariate.class).getPlugins();
-
- try {
- for ( String line : new XReadLines(RECAL_FILE) ) {
- lineNumber++;
- if ( EOF_MARKER.equals(line) ) {
- sawEOF = true;
- } else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() ) {
- ; // Skip over the comment lines, (which start with '#')
- }
- // Read in the covariates that were used from the input file
- else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
- if( foundAllCovariates ) {
- throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
- } else { // Found the covariate list in input file, loop through all of them and instantiate them
- String[] vals = line.split(",");
- for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
- boolean foundClass = false;
- for( Class> covClass : classes ) {
- if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
- foundClass = true;
- try {
- Covariate covariate = (Covariate)covClass.newInstance();
- requestedCovariates.add( covariate );
- } catch (Exception e) {
- throw new DynamicClassResolutionException(covClass, e);
- }
-
- }
- }
-
- if( !foundClass ) {
- throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
- }
- }
- }
-
- } else { // Found a line of data
- if( !foundAllCovariates ) {
- foundAllCovariates = true;
-
- // At this point all the covariates should have been found and initialized
- if( requestedCovariates.size() < 2 ) {
- throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
- }
-
- final boolean createCollapsedTables = true;
-
- // Initialize any covariate member variables using the shared argument collection
- for( Covariate cov : requestedCovariates ) {
- cov.initialize( RAC );
- }
- // Initialize the data hashMaps
- dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
-
- }
- addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
- }
- }
-
- } catch ( FileNotFoundException e ) {
- throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
- } catch ( NumberFormatException e ) {
- throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
- }
-
- if ( !sawEOF ) {
- final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
- if ( REQUIRE_EOF )
- throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
- }
-
- if( dataManager == null ) {
- throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
- }
-
- // Create the tables of empirical quality scores that will be used in the sequential calculation
- dataManager.generateEmpiricalQualities( SMOOTHING, MAX_QUALITY_SCORE );
- }
- // debug end copy
- */
- }
- /**
- * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
- */
- /*
- private void addCSVData(final File file, final String line) {
- final String[] vals = line.split(",");
-
- // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
- if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
- throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
- " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
- }
-
- final Object[] key = new Object[requestedCovariates.size()];
- Covariate cov;
- int iii;
- for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
- cov = requestedCovariates.get( iii );
- key[iii] = cov.getValue( vals[iii] );
- }
-
- // Create a new datum using the number of observations, number of mismatches, and reported quality score
- final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
- // Add that datum to all the collapsed tables which will be used in the sequential calculation
- dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
- }
-
-*/
- public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit) {
- this(indelGOP, indelGCP, deb, doCDP);
- this.doViterbi = dovit;
- }
-
- public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) {
-
-
+ public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean bandedLikelihoods) {
this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob
- this.doContextDependentPenalties = doCDP;
this.DEBUG = deb;
-
+ this.bandedLikelihoods = bandedLikelihoods;
// fill gap penalty table, affine naive model:
this.GAP_CONT_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
@@ -316,132 +129,6 @@ public class PairHMMIndelErrorModel {
}
- private double computeReadLikelihoodGivenHaplotype(byte[] haplotypeBases, byte[] readBases, byte[] readQuals) {
- final int X_METRIC_LENGTH = readBases.length+1;
- final int Y_METRIC_LENGTH = haplotypeBases.length+1;
-
- // initialize path metric and traceback memories for likelihood computation
- double[][] pathMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- int[][] bestMetricArray = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-
- pathMetricArray[0][0]= 0;//Double.NEGATIVE_INFINITY;
-
- for (int i=1; i < X_METRIC_LENGTH; i++) {
- pathMetricArray[i][0] = 0;
- bestMetricArray[i][0] = UP;
- }
-
- for (int j=1; j < Y_METRIC_LENGTH; j++) {
- pathMetricArray[0][j] = 0;//logGapOpenProbability + (j-1) * logGapContinuationProbability;
- bestMetricArray[0][j] = LEFT;
- }
-
- for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
- for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
-
- byte x = readBases[indI-1];
- byte y = haplotypeBases[indJ-1];
- byte qual = readQuals[indI-1];
-
- double bestMetric = 0.0;
- int bestMetricIdx = 0;
-
- // compute metric for match/mismatch
- // workaround for reads whose bases quality = 0,
- if (qual < 1)
- qual = 1;
-
- if (qual > MAX_CACHED_QUAL)
- qual = MAX_CACHED_QUAL;
-
- double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
- double[] metrics = new double[3];
-
- metrics[DIAG] = pathMetricArray[indI-1][indJ-1] + pBaseRead;
- metrics[UP] = pathMetricArray[indI-1][indJ] + logGapOpenProbability;//(end?0.0:logGapOpenProbability);
- metrics[LEFT] = pathMetricArray[indI][indJ-1] + logGapOpenProbability;//(end?0.0:logGapOpenProbability);
-
- if (doViterbi) {
- bestMetricIdx = MathUtils.maxElementIndex(metrics);
- bestMetric = metrics[bestMetricIdx];
- }
- else
- bestMetric = MathUtils.softMax(metrics);
-
- pathMetricArray[indI][indJ] = bestMetric;
- bestMetricArray[indI][indJ] = bestMetricIdx;
-
- }
- }
-
-
- double bestMetric=0.0;
- int bestMetricIdx=0,bestI=X_METRIC_LENGTH - 1, bestJ=Y_METRIC_LENGTH - 1;
-
- for (int i=0; i < X_METRIC_LENGTH; i ++ ) {
- int j= Y_METRIC_LENGTH-1;
-
- if (pathMetricArray[i][j] > bestMetric) {
- bestMetric = pathMetricArray[i][j];
- bestI = i;
- bestJ = j;
- }
- }
- for (int j=0; j < Y_METRIC_LENGTH; j++ ) {
- int i= X_METRIC_LENGTH-1;
- if (pathMetricArray[i][j] >= bestMetric) {
- bestMetric = pathMetricArray[i][j];
- bestI = i;
- bestJ = j;
- }
- }
-
- if (DEBUG && doViterbi) {
-
- String haplotypeString = new String (haplotypeBases);
- String readString = new String(readBases);
-
-
- int i = bestI;
- int j = bestJ;
-
-
- System.out.println("Simple NW");
-
- while (i >0 || j >0) {
- bestMetricIdx = bestMetricArray[i][j];
- System.out.print(bestMetricIdx);
- if (bestMetricIdx == UP) {
- // insert gap in Y
- haplotypeString = haplotypeString.substring(0,j)+"-"+haplotypeString.substring(j);
- i--;
- } else if (bestMetricIdx == LEFT) {
- readString = readString.substring(0,i)+"-"+readString.substring(i);
- j--;
- }
- else {
- i--; j--;
- }
- }
-
-
-
-
- System.out.println("\nAlignment: ");
- System.out.println("R:"+readString);
- System.out.println("H:"+haplotypeString);
- System.out.println();
-
-
- }
- if (DEBUG)
- System.out.format("Likelihood: %5.4f\n", bestMetric);
-
- return bestMetric;
-
-
- }
-
static private void getContextHomopolymerLength(final byte[] refBytes, int[] hrunArray) {
// compute forward hrun length, example:
// AGGTGACCCCCCTGAGAG
@@ -475,50 +162,75 @@ public class PairHMMIndelErrorModel {
}
+ private void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
+ double[] currentGOP, double[] currentGCP, double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
+ if (indI > 0 && indJ > 0) {
+ final int im1 = indI -1;
+ final int jm1 = indJ - 1;
+ // update current point
+ final byte x = readBases[im1];
+ final byte y = haplotypeBases[jm1];
+ final byte qual = readQuals[im1] < 1 ? 1 : (readQuals[im1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1]);
+
+ final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
+
+ matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
+ YMetricArray[im1][jm1] + pBaseRead);
+
+ final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
+ final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
+
+ XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
+
+ // update Y array
+ final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
+ final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
+ YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
+ }
+ }
+
private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
- double[] currentGOP, double[] currentGCP) {
+ double[] currentGOP, double[] currentGCP, int eventLength) {
final int X_METRIC_LENGTH = readBases.length+1;
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
- final int BWIDTH = 10;
-
// initialize path metric and traceback memories for likelihood computation
- double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- int[][] bestActionArrayM = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- int[][] bestActionArrayX = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- int[][] bestActionArrayY = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+ final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+ final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+ final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
- double c,d;
- matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
+
+ final double DIAG_TOL = 20; // means that max - min element in diags have to be > this number for banding to take effect.
+ // default initialization for all arrays
+ for (int i=0; i < X_METRIC_LENGTH; i++) {
+ Arrays.fill(matchMetricArray[i],Double.NEGATIVE_INFINITY);
+ Arrays.fill(YMetricArray[i],Double.NEGATIVE_INFINITY);
+ Arrays.fill(XMetricArray[i],Double.NEGATIVE_INFINITY);
+ }
for (int i=1; i < X_METRIC_LENGTH; i++) {
//initialize first column
- matchMetricArray[i][0] = Double.NEGATIVE_INFINITY;
- YMetricArray[i][0] = Double.NEGATIVE_INFINITY;
- XMetricArray[i][0] = END_GAP_COST*(i);//logGapOpenProbability + (i-1)*logGapContinuationProbability;
-
- bestActionArrayX[i][0] = bestActionArrayY[i][0] = bestActionArrayM[i][0] = UP_GOTO_X;
+ XMetricArray[i][0] = END_GAP_COST*(i);
}
for (int j=1; j < Y_METRIC_LENGTH; j++) {
// initialize first row
- matchMetricArray[0][j] = Double.NEGATIVE_INFINITY;
- XMetricArray[0][j] = Double.NEGATIVE_INFINITY;
- YMetricArray[0][j] = END_GAP_COST*(j);//logGapOpenProbability + (j-1) * logGapContinuationProbability;
-
- bestActionArrayY[0][j] = bestActionArrayM[0][j] = bestActionArrayX[0][j] = LEFT_GOTO_Y;
+ YMetricArray[0][j] = END_GAP_COST*(j);
}
+ matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
+ XMetricArray[0][0]= YMetricArray[0][0] = 0;
+
final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH -1;
final int elemsInDiag = Math.min(X_METRIC_LENGTH, Y_METRIC_LENGTH);
int idxWithMaxElement = 0;
- for (int diag=0; diag < numDiags; diag++) {
+ double maxElementInDiag = Double.NEGATIVE_INFINITY;
+ for (int diag=0; diag < numDiags; diag++) {
+ // compute default I and J start positions at edge of diagonals
int indI = 0;
int indJ = diag;
if (diag >= Y_METRIC_LENGTH ) {
@@ -526,225 +238,115 @@ public class PairHMMIndelErrorModel {
indJ = Y_METRIC_LENGTH-1;
}
+ // first pass: from max element to edge
+ int idxLow = bandedLikelihoods? idxWithMaxElement : 0;
- double maxElementInDiag = Double.NEGATIVE_INFINITY;
+ // reset diag max value before starting
+ if (bandedLikelihoods) {
+ maxElementInDiag = Double.NEGATIVE_INFINITY;
+ // set indI, indJ to correct values
+ indI += idxLow;
+ indJ -= idxLow;
+ if (indI >= X_METRIC_LENGTH || indJ < 0) {
+ idxLow--;
+ indI--;
+ indJ++;
+ }
- int idxLow = idxWithMaxElement - BWIDTH;
- int idxHigh = idxWithMaxElement + BWIDTH;
+ }
- if (idxLow < 0)
- idxLow = 0;
-
- if (idxHigh > elemsInDiag )
- idxHigh = elemsInDiag;
-
-
- // for each diagonal, compute max element, and center band around it for next diagonal.
- //
- // start point is idxOfMax-BWIDTH, end pt is idxMax+BWIDTH
-
- for (int el = idxLow; el < idxHigh; el++) {
- // first row and first column of all matrices had been pre-filled before
- int im1 = indI -1;
- int jm1 = indJ - 1;
- if (indI > 0 && indJ > 0) {
- // update current point
- byte x = readBases[im1];
- byte y = haplotypeBases[jm1];
- byte qual = readQuals[im1];
-
- double bestMetric = 0.0;
-
- // compute metric for match/mismatch
- // workaround for reads whose bases quality = 0,
- if (qual < 1)
- qual = 1;
-
- if (qual > MAX_CACHED_QUAL)
- qual = MAX_CACHED_QUAL;
-
- double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
-
- matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
- YMetricArray[im1][jm1] + pBaseRead);
-
- c = currentGOP[jm1];
- d = currentGCP[jm1];
- if (indJ == Y_METRIC_LENGTH-1)
- c = d = END_GAP_COST;
-
-
- XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c, XMetricArray[im1][indJ] + d);
-
- // update Y array
- //c = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
- //d = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
- c = currentGOP[jm1];
- d = currentGCP[jm1];
- if (indI == X_METRIC_LENGTH-1)
- c = d = END_GAP_COST;
-
- YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c, YMetricArray[indI][jm1] + d);
- bestMetric = MathUtils.softMax(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
+ for (int el = idxLow; el < elemsInDiag; el++) {
+ updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
+ currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
+ // update max in diagonal
+ if (bandedLikelihoods) {
+ final double bestMetric = MathUtils.softMax(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
+ // check if we've fallen off diagonal value by threshold
if (bestMetric > maxElementInDiag) {
maxElementInDiag = bestMetric;
- idxWithMaxElement = el ;
+ idxWithMaxElement = el;
}
-
+ else if (bestMetric < maxElementInDiag - DIAG_TOL)
+ break; // done w/current diagonal
}
+
indI++;
if (indI >=X_METRIC_LENGTH )
break;
indJ--;
- if (indJ < 0)
+ if (indJ <= 0)
break;
}
- }
-/*
- for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
- int im1 = indI-1;
- for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
- int jm1 = indJ-1;
- byte x = readBases[im1];
- byte y = haplotypeBases[jm1];
- byte qual = readQuals[im1];
-
- double bestMetric = 0.0;
- int bestMetricIdx = 0;
-
- // compute metric for match/mismatch
- // workaround for reads whose bases quality = 0,
- if (qual < 1)
- qual = 1;
-
- if (qual > MAX_CACHED_QUAL)
- qual = MAX_CACHED_QUAL;
-
- //qual = 30;
-
- double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
-
-
- double[] metrics = new double[3];
-
-
- if (doViterbi) {
- // update match array
- metrics[MATCH_OFFSET] = matchMetricArray[im1][jm1] + pBaseRead;
- metrics[X_OFFSET] = XMetricArray[im1][jm1] + pBaseRead;
- metrics[Y_OFFSET] = YMetricArray[im1][jm1] + pBaseRead;
-
- bestMetricIdx = MathUtils.maxElementIndex(metrics);
- bestMetric = metrics[bestMetricIdx];
+ if (bandedLikelihoods && idxLow > 0) {
+ // now do second part in opposite direction
+ indI = 0;
+ indJ = diag;
+ if (diag >= Y_METRIC_LENGTH ) {
+ indI = diag-(Y_METRIC_LENGTH-1);
+ indJ = Y_METRIC_LENGTH-1;
}
- else
- bestMetric = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
- YMetricArray[im1][jm1] + pBaseRead);
- matchMetricArray[indI][indJ] = bestMetric;
- bestActionArrayM[indI][indJ] = ACTIONS_M[bestMetricIdx];
+ indI += idxLow-1;
+ indJ -= idxLow-1;
+ for (int el = idxLow-1; el >= 0; el--) {
- // update X array
- // State X(i,j): X(1:i) aligned to a gap in Y(1:j).
- // When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps
+ updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
+ currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
+ // update max in diagonal
+ final double bestMetric = MathUtils.softMax(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
- //c = (indJ==Y_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
- //d = (indJ==Y_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
- if (getGapPenaltiesFromFile) {
- c = currentGOP[im1];
- d = logGapContinuationProbability;
+ // check if we've fallen off diagonal value by threshold
+ if (bestMetric > maxElementInDiag) {
+ maxElementInDiag = bestMetric;
+ idxWithMaxElement = el;
+ }
+ else if (bestMetric < maxElementInDiag - DIAG_TOL)
+ break; // done w/current diagonal
- } else {
- c = currentGOP[jm1];
- d = currentGCP[jm1];
+ indJ++;
+ if (indJ >= Y_METRIC_LENGTH )
+ break;
+ indI--;
+ if (indI <= 0)
+ break;
}
- if (indJ == Y_METRIC_LENGTH-1)
- c = d = END_GAP_COST;
-
- if (doViterbi) {
- metrics[MATCH_OFFSET] = matchMetricArray[im1][indJ] + c;
- metrics[X_OFFSET] = XMetricArray[im1][indJ] + d;
- metrics[Y_OFFSET] = Double.NEGATIVE_INFINITY; //YMetricArray[indI-1][indJ] + logGapOpenProbability;
-
- bestMetricIdx = MathUtils.maxElementIndex(metrics);
- bestMetric = metrics[bestMetricIdx];
- }
- else
- bestMetric = MathUtils.softMax(matchMetricArray[im1][indJ] + c, XMetricArray[im1][indJ] + d);
-
- XMetricArray[indI][indJ] = bestMetric;
- bestActionArrayX[indI][indJ] = ACTIONS_X[bestMetricIdx];
-
- // update Y array
- //c = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
- //d = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
- if (getGapPenaltiesFromFile) {
- c = currentGOP[im1];
- d = logGapContinuationProbability;
- }
- else {
- c = currentGOP[jm1];
- d = currentGCP[jm1];
- }
- if (indI == X_METRIC_LENGTH-1)
- c = d = END_GAP_COST;
-
-
-
- if (doViterbi) {
- metrics[MATCH_OFFSET] = matchMetricArray[indI][jm1] + c;
- metrics[X_OFFSET] = Double.NEGATIVE_INFINITY; //XMetricArray[indI][indJ-1] + logGapOpenProbability;
- metrics[Y_OFFSET] = YMetricArray[indI][jm1] + d;
-
- bestMetricIdx = MathUtils.maxElementIndex(metrics);
- bestMetric = metrics[bestMetricIdx];
- }
- else
- bestMetric = MathUtils.softMax(matchMetricArray[indI][jm1] + c, YMetricArray[indI][jm1] + d);
-
- YMetricArray[indI][indJ] = bestMetric;
- bestActionArrayY[indI][indJ] = ACTIONS_Y[bestMetricIdx];
-
-
-
}
+ // if (DEBUG)
+ // System.out.format("Max:%4.1f el:%d\n",maxElementInDiag, idxWithMaxElement);
}
- */
- double bestMetric;
- double metrics[] = new double[3];
- final int bestI=X_METRIC_LENGTH - 1, bestJ=Y_METRIC_LENGTH - 1;
- metrics[MATCH_OFFSET] = matchMetricArray[bestI][bestJ];
- metrics[X_OFFSET] = XMetricArray[bestI][bestJ];
- metrics[Y_OFFSET] = YMetricArray[bestI][bestJ];
- bestMetric = MathUtils.softMax(metrics);
- if (DEBUG) {
- PrintStream outx, outy, outm;
+ final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
+ final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
+ XMetricArray[bestI][bestJ],
+ YMetricArray[bestI][bestJ]);
+ /*
+ if (DEBUG) {
+ PrintStream outx, outy, outm, outs;
double[][] sumMetrics = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
try {
outx = new PrintStream("../../UGOptim/datax.txt");
outy = new PrintStream("../../UGOptim/datay.txt");
outm = new PrintStream("../../UGOptim/datam.txt");
-
+ outs = new PrintStream("../../UGOptim/datas.txt");
+ double metrics[] = new double[3];
for (int indI=0; indI < X_METRIC_LENGTH; indI++) {
for (int indJ=0; indJ < Y_METRIC_LENGTH; indJ++) {
- metrics[MATCH_OFFSET] = matchMetricArray[indI][indJ];
- metrics[X_OFFSET] = XMetricArray[indI][indJ];
- metrics[Y_OFFSET] = YMetricArray[indI][indJ];
- sumMetrics[indI][indJ] = MathUtils.softMax(metrics);
- outx.format("%4.1f ", metrics[X_OFFSET]);
- outy.format("%4.1f ", metrics[Y_OFFSET]);
- outm.format("%4.1f ", metrics[MATCH_OFFSET]);
+ metrics[0] = matchMetricArray[indI][indJ];
+ metrics[1] = XMetricArray[indI][indJ];
+ metrics[2] = YMetricArray[indI][indJ];
+ //sumMetrics[indI][indJ] = MathUtils.softMax(metrics);
+ outx.format("%4.1f ", metrics[1]);
+ outy.format("%4.1f ", metrics[2]);
+ outm.format("%4.1f ", metrics[0]);
+ outs.format("%4.1f ", MathUtils.softMax(metrics));
}
- outx.println(); outm.println();outy.println();
+ outx.println(); outm.println();outy.println(); outs.println();
}
outm.close(); outx.close(); outy.close();
} catch (java.io.IOException e) { throw new UserException("bla");}
- }
-
- if (DEBUG)
- System.out.format("Likelihood: %5.4f\n", bestMetric);
+ }
+ */
return bestMetric;
@@ -765,50 +367,39 @@ public class PairHMMIndelErrorModel {
}
}
public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap haplotypeMap,
- ReferenceContext ref, int eventLength,
- HashMap> indelLikelihoodMap){
+ ReferenceContext ref, int eventLength,
+ HashMap> indelLikelihoodMap){
int numHaplotypes = haplotypeMap.size();
- double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
- double readLikelihoods[][] = new double[pileup.getReads().size()][numHaplotypes];
+ final double readLikelihoods[][] = new double[pileup.size()][numHaplotypes];
+ final int readCounts[] = new int[pileup.size()];
int readIdx=0;
LinkedHashMap gapOpenProbabilityMap = new LinkedHashMap();
LinkedHashMap gapContProbabilityMap = new LinkedHashMap();
- if (DEBUG) {
- System.out.println("Reference bases:");
- System.out.println(new String(ref.getBases()));
+ // will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
+ // todo -- refactor into separate function
+ for (Allele a: haplotypeMap.keySet()) {
+ Haplotype haplotype = haplotypeMap.get(a);
+ byte[] haplotypeBases = haplotype.getBasesAsBytes();
+ double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
+ double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
+
+ // get homopolymer length profile for current haplotype
+ int[] hrunProfile = new int[haplotypeBases.length];
+ getContextHomopolymerLength(haplotypeBases,hrunProfile);
+ fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
+
+ gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
+ gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
+
}
- if (doContextDependentPenalties && !getGapPenaltiesFromFile) {
- // will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
-
-
- for (Allele a: haplotypeMap.keySet()) {
- Haplotype haplotype = haplotypeMap.get(a);
- byte[] haplotypeBases = haplotype.getBasesAsBytes();
- double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
- double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
-
- // get homopolymer length profile for current haplotype
- int[] hrunProfile = new int[haplotypeBases.length];
- getContextHomopolymerLength(haplotypeBases,hrunProfile);
- if (DEBUG) {
- System.out.println("Haplotype bases:");
- System.out.println(new String(haplotypeBases));
- for (int i=0; i < hrunProfile.length; i++)
- System.out.format("%d",hrunProfile[i]);
- System.out.println();
- }
- fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
-
- gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
- gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
-
- }
- }
for (PileupElement p: pileup) {
+ // > 1 when the read is a consensus read representing multiple independent observations
+ final boolean isReduced = ReadUtils.isReducedRead(p.getRead());
+ readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
if (indelLikelihoodMap.containsKey(p)) {
@@ -820,61 +411,20 @@ public class PairHMMIndelErrorModel {
}
else {
//System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
- GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
+ SAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
if (read == null)
continue;
- if(ReadUtils.is454Read(read) && !getGapPenaltiesFromFile) {
+ if ( isReduced ) {
+ read = ReadUtils.reducedReadWithReducedQuals(read);
+ }
+
+ if(ReadUtils.is454Read(read)) {
continue;
}
double[] recalQuals = null;
- /*
- if (getGapPenaltiesFromFile) {
- RecalDataManager.parseSAMRecord( read, RAC );
-
-
- recalQuals = new double[read.getReadLength()];
-
- //compute all covariate values for this read
- final Comparable[][] covariateValues_offset_x_covar =
- RecalDataManager.computeCovariates((GATKSAMRecord) read, requestedCovariates);
- // For each base in the read
- for( int offset = 0; offset < read.getReadLength(); offset++ ) {
-
- final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
-
- Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
- if(qualityScore == null)
- {
- qualityScore = performSequentialQualityCalculation( fullCovariateKey );
- qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
- }
-
- recalQuals[offset] = -((double)qualityScore)/10.0;
- }
-
- // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
- // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
- if (DEBUG) {
- System.out.format("\n\nStarting read:%s S:%d US:%d E:%d UE:%d C:%s\n",read.getReadName(),
- read.getAlignmentStart(),
- read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(),
- read.getCigarString());
-
- byte[] bases = read.getReadBases();
- for (int k = 0; k < recalQuals.length; k++) {
- System.out.format("%c",bases[k]);
- }
- System.out.println();
-
- for (int k = 0; k < recalQuals.length; k++) {
- System.out.format("%.0f ",recalQuals[k]);
- }
- System.out.println();
- }
- } */
// get bases of candidate haplotypes that overlap with reads
final int trailingBases = 3;
@@ -968,18 +518,16 @@ public class PairHMMIndelErrorModel {
// ok, we now figured out total number of clipped bases on both ends.
// Figure out where we want to place the haplotype to score read against
- if (DEBUG)
- System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
- numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength());
-
+ /*
+ if (DEBUG)
+ System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
+ numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength());
+ */
LinkedHashMap readEl = new LinkedHashMap();
if (numStartClippedBases + numEndClippedBases >= unclippedReadBases.length) {
- if (DEBUG)
- System.out.println("BAD READ!!");
-
int j=0;
for (Allele a: haplotypeMap.keySet()) {
readEl.put(a,0.0);
@@ -994,18 +542,6 @@ public class PairHMMIndelErrorModel {
byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartClippedBases,
unclippedReadBases.length-numEndClippedBases);
- double[] recalCDP = null;
- if (getGapPenaltiesFromFile) {
- recalCDP = Arrays.copyOfRange(recalQuals,numStartClippedBases,
- unclippedReadBases.length-numEndClippedBases);
-
- }
-
- if (DEBUG) {
- System.out.println("Read bases:");
- System.out.println(new String(readBases));
- }
-
int j=0;
for (Allele a: haplotypeMap.keySet()) {
@@ -1024,32 +560,10 @@ public class PairHMMIndelErrorModel {
byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBasesAsBytes(),
(int)indStart, (int)indStop);
- if (DEBUG) {
- System.out.println("Haplotype to test:");
- System.out.println(new String(haplotypeBases));
- }
-
- Double readLikelihood = 0.0;
- if (useAffineGapModel) {
-
- double[] currentContextGOP = null;
- double[] currentContextGCP = null;
-
- if (doContextDependentPenalties) {
-
- if (getGapPenaltiesFromFile) {
- readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, recalCDP, null);
-
- } else {
- currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
- currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
- readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
- }
- }
-
- }
- else
- readLikelihood = computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals);
+ final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
+ final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
+ final double readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals,
+ currentContextGOP, currentContextGCP, eventLength);
readEl.put(a,readLikelihood);
readLikelihoods[readIdx][j++] = readLikelihood;
@@ -1062,7 +576,7 @@ public class PairHMMIndelErrorModel {
if (DEBUG) {
System.out.println("\nLikelihood summary");
- for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
+ for (readIdx=0; readIdx < pileup.size(); readIdx++) {
System.out.format("Read Index: %d ",readIdx);
for (int i=0; i < readLikelihoods[readIdx].length; i++)
System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]);
@@ -1070,36 +584,35 @@ public class PairHMMIndelErrorModel {
}
}
+
+ return getHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
+ }
+
+ private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
+ final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
+
+ // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
for (int i=0; i < numHaplotypes; i++) {
for (int j=i; j < numHaplotypes; j++){
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
// L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
//readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
- for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
-
+ for (int readIdx = 0; readIdx < readLikelihoods.length; readIdx++) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup.
if (Double.isInfinite(readLikelihoods[readIdx][i]) && Double.isInfinite(readLikelihoods[readIdx][j]))
continue;
- haplotypeLikehoodMatrix[i][j] += ( MathUtils.softMax(readLikelihoods[readIdx][i],
- readLikelihoods[readIdx][j]) + LOG_ONE_HALF);
-
+ final double li = readLikelihoods[readIdx][i];
+ final double lj = readLikelihoods[readIdx][j];
+ final int readCount = readCounts[readIdx];
+ haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
}
-
-
}
}
- return getHaplotypeLikelihoods(haplotypeLikehoodMatrix);
-
- }
-
- public static double[] getHaplotypeLikelihoods(double[][] haplotypeLikehoodMatrix) {
- int hSize = haplotypeLikehoodMatrix.length;
- double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
-
+ final double[] genotypeLikelihoods = new double[numHaplotypes*(numHaplotypes+1)/2];
int k=0;
- for (int j=0; j < hSize; j++) {
+ for (int j=0; j < numHaplotypes; j++) {
for (int i=0; i <= j; i++){
genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
}
@@ -1108,79 +621,4 @@ public class PairHMMIndelErrorModel {
// renormalize so that max element is zero.
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
}
-
- /**
- * Implements a serial recalibration of the reads using the combinational table.
- * First, we perform a positional recalibration, and then a subsequent dinuc correction.
- *
- * Given the full recalibration table, we perform the following preprocessing steps:
- *
- * - calculate the global quality score shift across all data [DeltaQ]
- * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
- * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
- * - The final shift equation is:
- *
- * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
- * @param key The list of Comparables that were calculated from the covariates
- * @return A recalibrated quality score as a byte
- */
- /*
- private byte performSequentialQualityCalculation( final Object... key ) {
-
- final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
- final Object[] readGroupCollapsedKey = new Object[1];
- final Object[] qualityScoreCollapsedKey = new Object[2];
- final Object[] covariateCollapsedKey = new Object[3];
-
- // The global quality shift (over the read group only)
- readGroupCollapsedKey[0] = key[0];
- final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
- double globalDeltaQ = 0.0;
- if( globalRecalDatum != null ) {
- final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
- final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
- globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
- }
-
- // The shift in quality between reported and empirical
- qualityScoreCollapsedKey[0] = key[0];
- qualityScoreCollapsedKey[1] = key[1];
- final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
- double deltaQReported = 0.0;
- if( qReportedRecalDatum != null ) {
- final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
- deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
- }
-
- // The shift in quality due to each covariate by itself in turn
- double deltaQCovariates = 0.0;
- double deltaQCovariateEmpirical;
- covariateCollapsedKey[0] = key[0];
- covariateCollapsedKey[1] = key[1];
- for( int iii = 2; iii < key.length; iii++ ) {
- covariateCollapsedKey[2] = key[iii]; // The given covariate
- final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
- if( covariateRecalDatum != null ) {
- deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
- deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
- }
- }
-
- final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
- return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
-
- // Verbose printouts used to validate with old recalibrator
- //if(key.contains(null)) {
- // System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
- // qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
- //}
- //else {
- // System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
- // key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
- //}
-
- //return newQualityByte;
-
- }
-*/
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
index 12899e898..053864791 100755
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
@@ -81,20 +81,17 @@ public class PileupElement {
//
// --------------------------------------------------------------------------
- private Integer getReducedReadQualityTagValue() {
- return getRead().getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
- }
-
public boolean isReducedRead() {
- return getReducedReadQualityTagValue() != null;
+ return ReadUtils.isReducedRead(getRead());
}
public int getReducedCount() {
+ if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName());
return (int)getQual();
}
public byte getReducedQual() {
- return (byte)(int)getReducedReadQualityTagValue();
+ return (byte)(int)ReadUtils.getReducedReadQualityTagValue(getRead());
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
index c328bbc5a..8beb1b21a 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@@ -43,10 +43,43 @@ import java.util.*;
* @version 0.1
*/
public class ReadUtils {
- public static final String REDUCED_READ_QUALITY_TAG = "RQ";
-
private ReadUtils() { }
+ // ----------------------------------------------------------------------------------------------------
+ //
+ // Reduced read utilities
+ //
+ // ----------------------------------------------------------------------------------------------------
+
+ public static final String REDUCED_READ_QUALITY_TAG = "RQ";
+
+ public final static Integer getReducedReadQualityTagValue(final SAMRecord read) {
+ return read.getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
+ }
+
+ public final static boolean isReducedRead(final SAMRecord read) {
+ return getReducedReadQualityTagValue(read) != null;
+ }
+
+ public final static SAMRecord reducedReadWithReducedQuals(final SAMRecord read) {
+ if ( ! isReducedRead(read) ) throw new IllegalArgumentException("read must be a reduced read");
+ try {
+ SAMRecord newRead = (SAMRecord)read.clone();
+ byte reducedQual = (byte)(int)getReducedReadQualityTagValue(read);
+ byte[] newQuals = new byte[read.getBaseQualities().length];
+ Arrays.fill(newQuals, reducedQual);
+ newRead.setBaseQualities(newQuals);
+ return newRead;
+ } catch ( CloneNotSupportedException e ) {
+ throw new ReviewedStingException("SAMRecord no longer supports clone", e);
+ }
+ }
+
+ // ----------------------------------------------------------------------------------------------------
+ //
+ // General utilities
+ //
+ // ----------------------------------------------------------------------------------------------------
public static SAMFileHeader copySAMFileHeader(SAMFileHeader toCopy) {
SAMFileHeader copy = new SAMFileHeader();
@@ -157,7 +190,7 @@ public class ReadUtils {
* |----------------| (interval)
* <--------> (read)
*/
- public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
+ public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
/**
* God, there's a huge information asymmetry in SAM format:
@@ -640,27 +673,35 @@ public class ReadUtils {
*/
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
- int start = getRefCoordSoftUnclippedStart(read);
- int stop = getRefCoordSoftUnclippedEnd(read);
+ int sStart = getRefCoordSoftUnclippedStart(read);
+ int sStop = getRefCoordSoftUnclippedEnd(read);
+ int uStart = read.getUnclippedStart();
+ int uStop = read.getUnclippedEnd();
if ( !read.getReferenceName().equals(interval.getContig()) )
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
- else if ( stop < interval.getStart() )
+ else if ( uStop < interval.getStart() )
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
- else if ( start > interval.getStop() )
+ else if ( uStart > interval.getStop() )
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
- else if ( (start >= interval.getStart()) &&
- (stop <= interval.getStop()) )
+ else if ( sStop < interval.getStart() )
+ return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
+
+ else if ( sStart > interval.getStop() )
+ return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
+
+ else if ( (sStart >= interval.getStart()) &&
+ (sStop <= interval.getStop()) )
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
- else if ( (start < interval.getStart()) &&
- (stop > interval.getStop()) )
+ else if ( (sStart < interval.getStart()) &&
+ (sStop > interval.getStop()) )
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
- else if ( (start < interval.getStart()) )
+ else if ( (sStart < interval.getStart()) )
return ReadAndIntervalOverlap.OVERLAP_LEFT;
else
diff --git a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java
index 7cb7fec98..e1fdadadc 100755
--- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java
@@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
@@ -12,9 +13,10 @@ import org.testng.annotations.Test;
public class ReadUtilsUnitTest extends BaseTest {
- SAMRecord read;
+ SAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
+ final private static int REDUCED_READ_QUAL = 20;
@BeforeTest
public void init() {
@@ -23,6 +25,11 @@ public class ReadUtilsUnitTest extends BaseTest {
read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
+
+ reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
+ reducedRead.setReadBases(BASES.getBytes());
+ reducedRead.setBaseQualityString(QUALS);
+ reducedRead.setAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG, REDUCED_READ_QUAL);
}
private void testReadBasesAndQuals(SAMRecord read, int expectedStart, int expectedStop) {
@@ -38,4 +45,40 @@ public class ReadUtilsUnitTest extends BaseTest {
@Test public void testClip2Front() { testReadBasesAndQuals(read, 2, 4); }
@Test public void testClip1Back() { testReadBasesAndQuals(read, 0, 3); }
@Test public void testClip2Back() { testReadBasesAndQuals(read, 0, 2); }
+
+ @Test
+ public void testReducedReads() {
+ Assert.assertFalse(ReadUtils.isReducedRead(read), "isReducedRead is false for normal read");
+ Assert.assertEquals(ReadUtils.getReducedReadQualityTagValue(read), null, "No reduced read tag in normal read");
+
+ Assert.assertTrue(ReadUtils.isReducedRead(reducedRead), "isReducedRead is true for reduced read");
+ Assert.assertEquals((int) ReadUtils.getReducedReadQualityTagValue(reducedRead), REDUCED_READ_QUAL, "Reduced read tag is set to expected value");
+ }
+
+ @Test
+ public void testreducedReadWithReducedQualsWithReducedRead() {
+ SAMRecord replacedRead = ReadUtils.reducedReadWithReducedQuals(reducedRead);
+ Assert.assertEquals(replacedRead.getReadBases(), reducedRead.getReadBases());
+ Assert.assertEquals(replacedRead.getBaseQualities().length, reducedRead.getBaseQualities().length);
+ for ( int i = 0; i < replacedRead.getBaseQualities().length; i++)
+ Assert.assertEquals(replacedRead.getBaseQualities()[i], REDUCED_READ_QUAL);
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void testreducedReadWithReducedQualsWithNormalRead() {
+ ReadUtils.reducedReadWithReducedQuals(read);
+ }
+
+ @Test
+ public void testReducedReadPileupElement() {
+ PileupElement readp = new PileupElement(read,0);
+ PileupElement reducedreadp = new PileupElement(reducedRead,0);
+
+ Assert.assertFalse(readp.isReducedRead());
+
+ Assert.assertTrue(reducedreadp.isReducedRead());
+ Assert.assertEquals(reducedreadp.getReducedCount(), 0);
+ Assert.assertEquals(reducedreadp.getReducedQual(), REDUCED_READ_QUAL);
+
+ }
}
diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml
index 4f635f7fb..4364988e7 100644
--- a/public/packages/GATKEngine.xml
+++ b/public/packages/GATKEngine.xml
@@ -29,7 +29,7 @@
-
+
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
old mode 100755
new mode 100644
index 297da8cc9..a3e83871e
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException
/**
* Entry point of Queue. Compiles and runs QScripts passed in to the command line.
*/
-object QCommandLine {
+object QCommandLine extends Logging {
/**
* Main.
* @param argv Arguments.
@@ -45,22 +45,23 @@ object QCommandLine {
def main(argv: Array[String]) {
val qCommandLine = new QCommandLine
- Runtime.getRuntime.addShutdownHook(new Thread {
- /** Cleanup as the JVM shuts down. */
+ val shutdownHook = new Thread {
override def run() {
+ logger.info("Shutting down jobs. Please wait...")
ProcessController.shutdown()
qCommandLine.shutdown()
}
- })
+ }
+
+ Runtime.getRuntime.addShutdownHook(shutdownHook)
try {
CommandLineProgram.start(qCommandLine, argv);
+ Runtime.getRuntime.removeShutdownHook(shutdownHook)
if (CommandLineProgram.result != 0)
System.exit(CommandLineProgram.result);
} catch {
case e: Exception => CommandLineProgram.exitSystemWithError(e)
- } finally {
-
}
}
}