diff --git a/build.xml b/build.xml
index e92e41c10..03f3232f2 100644
--- a/build.xml
+++ b/build.xml
@@ -669,21 +669,13 @@
-
-
-
-
-
-
-
-
-
+
@@ -753,7 +745,7 @@
-
+
@@ -873,14 +865,18 @@
-
-
+
+
+
+
+
+
@@ -929,12 +925,17 @@
+
+
-
+
+
+
+
@@ -1081,7 +1082,7 @@
-
+
@@ -1112,7 +1113,7 @@
-
+
@@ -1122,7 +1123,7 @@
-
+
@@ -1252,7 +1253,7 @@
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
-
+
@@ -1295,7 +1296,7 @@
-
+
@@ -1450,4 +1451,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ivy.xml b/ivy.xml
index 1802c1627..ed13af1c2 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -35,9 +35,14 @@
+
+
+
+
+
@@ -81,9 +86,10 @@
-
+
+
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
index 3a1532bb1..a47e417c4 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
@@ -50,10 +50,13 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
+import org.broadinstitute.sting.utils.collections.DefaultHashMap;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.File;
import java.io.PrintStream;
+import java.util.Collections;
+import java.util.Map;
/**
* Created with IntelliJ IDEA.
@@ -118,6 +121,33 @@ public class StandardCallerArgumentCollection {
public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION;
public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05;
+ /**
+ * This argument specifies a file with two columns "sample" and "contamination" specifying the contamination level for those samples.
+ * Samples that do not appear in this file will be processed with CONTAMINATION_FRACTION
+ **/
+ @Advanced
+ @Argument(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"\" (Contamination is double) per line; No header.", required = false)
+ public File CONTAMINATION_FRACTION_FILE = null;
+
+ /**
+ *
+ * @return an _Immutable_ copy of the Sample-Contamination Map, defaulting to CONTAMINATION_FRACTION so that if the sample isn't in the map map(sample)==CONTAMINATION_FRACTION
+ */
+ public Map getSampleContamination(){
+ //make sure that the default value is set up right
+ sampleContamination.setDefaultValue(CONTAMINATION_FRACTION);
+ return Collections.unmodifiableMap(sampleContamination);
+ }
+
+ public void setSampleContamination(DefaultHashMap sampleContamination) {
+ this.sampleContamination.clear();
+ this.sampleContamination.putAll(sampleContamination);
+ this.sampleContamination.setDefaultValue(CONTAMINATION_FRACTION);
+ }
+
+ //Needs to be here because it uses CONTAMINATION_FRACTION
+ private DefaultHashMap sampleContamination = new DefaultHashMap(CONTAMINATION_FRACTION);
+
/**
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
*/
@@ -145,8 +175,10 @@ public class StandardCallerArgumentCollection {
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
this.CONTAMINATION_FRACTION = SCAC.CONTAMINATION_FRACTION;
+ this.CONTAMINATION_FRACTION_FILE=SCAC.CONTAMINATION_FRACTION_FILE;
this.contaminationLog = SCAC.contaminationLog;
this.exactCallsLog = SCAC.exactCallsLog;
+ this.sampleContamination=SCAC.sampleContamination;
this.AFmodel = SCAC.AFmodel;
}
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Coverage.java
similarity index 99%
rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Coverage.java
index 4adb2ca71..5138ac9af 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Coverage.java
@@ -75,7 +75,7 @@ import java.util.Map;
* over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
* N samples with -dcov D is N * D
*/
-public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
+public class Coverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
index ff3d7940f..14c785678 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@@ -142,7 +142,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
public List getDescriptions() {
return Arrays.asList(
- new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
+ new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
}
private Double pValueForContingencyTable(int[][] originalTable) {
@@ -176,7 +176,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
//System.out.printf("P-cutoff: %f\n", pCutoff);
//System.out.printf("P-value: %f\n\n", pValue);
- return pValue;
+ // min is necessary as numerical precision can result in pValue being slightly greater than 1.0
+ return Math.min(pValue, 1.0);
}
private static int [][] copyContingencyTable(int [][] t) {
@@ -222,14 +223,14 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
// calculate in log space so we don't die with high numbers
double pCutoff = Arithmetic.logFactorial(rowSums[0])
- + Arithmetic.logFactorial(rowSums[1])
- + Arithmetic.logFactorial(colSums[0])
- + Arithmetic.logFactorial(colSums[1])
- - Arithmetic.logFactorial(table[0][0])
- - Arithmetic.logFactorial(table[0][1])
- - Arithmetic.logFactorial(table[1][0])
- - Arithmetic.logFactorial(table[1][1])
- - Arithmetic.logFactorial(N);
+ + Arithmetic.logFactorial(rowSums[1])
+ + Arithmetic.logFactorial(colSums[0])
+ + Arithmetic.logFactorial(colSums[1])
+ - Arithmetic.logFactorial(table[0][0])
+ - Arithmetic.logFactorial(table[0][1])
+ - Arithmetic.logFactorial(table[1][0])
+ - Arithmetic.logFactorial(table[1][1])
+ - Arithmetic.logFactorial(N);
return Math.exp(pCutoff);
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java
index 93bdf8c9d..48b3593c5 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java
@@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAn
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@@ -69,7 +70,7 @@ import java.util.Map;
/**
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
*/
-@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map annotate(final RefMetaDataTracker tracker,
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
index 13969eb54..0455290e3 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
@@ -91,8 +91,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final Map stratifiedPerReadAlleleLikelihoodMap) {
if (vc.isSNP() && stratifiedContexts != null)
return annotatePileup(ref, stratifiedContexts, vc);
- else if (stratifiedPerReadAlleleLikelihoodMap != null && vc.isVariant())
- return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc);
else
return null;
}
@@ -133,31 +131,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return map;
}
- private Map annotateWithLikelihoods(final Map stratifiedPerReadAlleleLikelihoodMap,
- final VariantContext vc) {
-
- final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
- for (final Genotype genotype : vc.getGenotypes()) {
- final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
- if (perReadAlleleLikelihoodMap == null)
- continue;
-
- Double d = scoreIndelsAgainstHaplotypes(perReadAlleleLikelihoodMap);
- if (d == null)
- continue;
- scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
- }
-
- // if (scoreRA.observationCount() == 0)
- // return null;
-
- // annotate the score in the info field
- final Map map = new HashMap();
- map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean()));
- return map;
-
- }
-
private static class HaplotypeComparator implements Comparator, Serializable {
public int compare(Haplotype a, Haplotype b) {
@@ -180,7 +153,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
for (final PileupElement p : pileup) {
final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
- candidateHaplotypeQueue.add(haplotypeFromRead);
+ if ( haplotypeFromRead != null )
+ candidateHaplotypeQueue.add(haplotypeFromRead);
}
// Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
@@ -230,8 +204,18 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return null;
}
+ /**
+ * Return a haplotype object constructed from the read or null if read's cigar is null
+ *
+ * @param p pileup element representing the read
+ * @param contextSize the context size to use
+ * @param locus the position
+ * @return possibly null Haplotype object constructed from the read
+ */
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
final GATKSAMRecord read = p.getRead();
+ if ( read.getCigar() == null )
+ return null;
final byte[] haplotypeBases = new byte[contextSize];
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
@@ -347,6 +331,10 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
double expected = 0.0;
double mismatches = 0.0;
+ final GATKSAMRecord read = p.getRead();
+ if ( read.getCigar() == null )
+ return 0.0;
+
// What's the expected mismatch rate under the model that this read is actually sampled from
// this haplotype? Let's assume the consensus base c is a random choice one of A, C, G, or T, and that
// the observed base is actually from a c with an error rate e. Since e is the rate at which we'd
@@ -358,14 +346,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
// the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
// so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
final byte[] haplotypeBases = haplotype.getBases();
- final GATKSAMRecord read = p.getRead();
byte[] readBases = read.getReadBases();
readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string
byte[] readQuals = read.getBaseQualities();
readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
- int readOffsetFromPileup = p.getOffset();
- readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus);
+ int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus);
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
for (int i = 0; i < contextSize; i++) {
@@ -399,39 +385,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return mismatches - expected;
}
-
- private Double scoreIndelsAgainstHaplotypes(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
- final ArrayList haplotypeScores = new ArrayList();
-
- if (perReadAlleleLikelihoodMap.isEmpty())
- return null;
-
- for (Map el : perReadAlleleLikelihoodMap.getLikelihoodMapValues()) {
-
- // retrieve likelihood information corresponding to this read
- // Score all the reads in the pileup, even the filtered ones
- final double[] scores = new double[el.size()];
- int i = 0;
- for (Map.Entry a : el.entrySet()) {
- scores[i++] = -a.getValue();
- if (DEBUG) {
- System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
- }
- }
-
- haplotypeScores.add(scores);
- }
-
- // indel likelihoods are strict log-probs, not phred scored
- double overallScore = 0.0;
- for (final double[] readHaplotypeScores : haplotypeScores) {
- overallScore += MathUtils.arrayMin(readHaplotypeScores);
- }
-
- return overallScore;
-
- }
-
public List getKeyNames() {
return Arrays.asList("HaplotypeScore");
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
index ddca5e0b8..ae0d2a87b 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@@ -87,7 +87,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
if (alleleLikelihoodMap == null) {
// use old UG SNP-based version if we don't have per-read allele likelihoods
for ( final PileupElement p : pileup ) {
- if ( isUsableBase(p) ) {
+ if ( isUsableBase(p) && p.getRead().getCigar() != null ) {
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
readPos = getFinalReadPosition(p.getRead(),readPos);
@@ -103,26 +103,26 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
}
for (Map.Entry> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
+ final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
+ if (a.isNoCall())
+ continue; // read is non-informative
+
final GATKSAMRecord read = el.getKey();
+ if ( read.getSoftStart() + read.getCigar().getReadLength() <= refLoc ) { // make sure the read actually covers the requested ref loc
+ continue;
+ }
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
- if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
+ if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED || read.getCigar() == null )
continue;
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 );
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
if (readPos > numAlignedBases / 2)
readPos = numAlignedBases - (readPos + 1);
-// int readPos = getOffsetFromClippedReadStart(el.getKey(), el.getKey().getOffset());
- // readPos = getFinalReadPosition(el.getKey().getRead(),readPos);
-
- final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
- if (a.isNoCall())
- continue; // read is non-informative
if (a.isReference())
refQuals.add((double)readPos);
else if (allAlleles.contains(a))
altQuals.add((double)readPos);
-
}
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
index 354e508c2..e1972334b 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@@ -67,6 +67,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.recalibration.*;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -127,7 +128,7 @@ import java.util.List;
*
*/
-@DocumentedGATKFeature(groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class})
+@DocumentedGATKFeature(groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class})
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
@PartitionBy(PartitionType.READ)
@@ -214,6 +215,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche
}
initializeRecalibrationEngine();
+ RecalUtils.checkForInvalidRecalBams(getToolkit().getSAMFileHeaders(), getToolkit().getArguments().ALLOW_BQSR_ON_REDUCED_BAMS);
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
referenceReader = getToolkit().getReferenceDataSource().getReference();
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java
index 83d5bb29b..94d1c5501 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java
@@ -179,6 +179,6 @@ public final class ReadRecalibrationInfo {
}
private boolean validQual(final byte result) {
- return result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE;
+ return result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE;
}
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
index 95b54102f..5ab296a5f 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@@ -262,8 +262,12 @@ public class RecalibrationArgumentCollection {
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.addRowID("mismatches_default_quality", true);
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
+ argumentsTable.addRowID("deletions_default_quality", true);
+ argumentsTable.set("deletions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_DEFAULT_QUALITY);
argumentsTable.addRowID("insertions_default_quality", true);
argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
+ argumentsTable.addRowID("maximum_cycle_value", true);
+ argumentsTable.set("maximum_cycle_value", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MAXIMUM_CYCLE_VALUE);
argumentsTable.addRowID("low_quality_tail", true);
argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
argumentsTable.addRowID("default_platform", true);
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java
index 207590c5f..c7b990a88 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java
@@ -53,42 +53,82 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
* @since 6/15/12
*/
public class BaseAndQualsCounts extends BaseCounts {
- private final long[] sumInsertionQuals;
- private final long[] sumDeletionQuals;
- public BaseAndQualsCounts() {
- super();
- this.sumInsertionQuals = new long[BaseIndex.values().length];
- this.sumDeletionQuals = new long[BaseIndex.values().length];
- for (final BaseIndex i : BaseIndex.values()) {
- sumInsertionQuals[i.index] = 0L;
- sumDeletionQuals[i.index] = 0L;
- }
- }
+ private long sumInsertionQual_A = 0;
+ private long sumDeletionQual_A = 0;
+ private long sumInsertionQual_C = 0;
+ private long sumDeletionQual_C = 0;
+ private long sumInsertionQual_G = 0;
+ private long sumDeletionQual_G = 0;
+ private long sumInsertionQual_T = 0;
+ private long sumDeletionQual_T = 0;
+ private long sumInsertionQual_D = 0;
+ private long sumDeletionQual_D = 0;
+ private long sumInsertionQual_I = 0;
+ private long sumDeletionQual_I = 0;
+ private long sumInsertionQual_N = 0;
+ private long sumDeletionQual_N = 0;
+
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual) {
final BaseIndex i = BaseIndex.byteToBase(base);
super.incr(i, baseQual);
- sumInsertionQuals[i.index] += insQual;
- sumDeletionQuals[i.index] += delQual;
+ switch (i) {
+ case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break;
+ case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break;
+ case G: sumInsertionQual_G += insQual; sumDeletionQual_G += delQual; break;
+ case T: sumInsertionQual_T += insQual; sumDeletionQual_T += delQual; break;
+ case D: sumInsertionQual_D += insQual; sumDeletionQual_D += delQual; break;
+ case I: sumInsertionQual_I += insQual; sumDeletionQual_I += delQual; break;
+ case N: sumInsertionQual_N += insQual; sumDeletionQual_N += delQual; break;
+ }
}
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual) {
final BaseIndex i = BaseIndex.byteToBase(base);
super.decr(i, baseQual);
- sumInsertionQuals[i.index] -= insQual;
- sumDeletionQuals[i.index] -= delQual;
+ switch (i) {
+ case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break;
+ case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break;
+ case G: sumInsertionQual_G -= insQual; sumDeletionQual_G -= delQual; break;
+ case T: sumInsertionQual_T -= insQual; sumDeletionQual_T -= delQual; break;
+ case D: sumInsertionQual_D -= insQual; sumDeletionQual_D -= delQual; break;
+ case I: sumInsertionQual_I -= insQual; sumDeletionQual_I -= delQual; break;
+ case N: sumInsertionQual_N -= insQual; sumDeletionQual_N -= delQual; break;
+ }
}
public byte averageInsertionQualsOfBase(final BaseIndex base) {
- return getGenericAverageQualOfBase(base, sumInsertionQuals);
+ return (byte) (getInsertionQual(base) / countOfBase(base));
}
public byte averageDeletionQualsOfBase(final BaseIndex base) {
- return getGenericAverageQualOfBase(base, sumDeletionQuals);
+ return (byte) (getDeletionQual(base) / countOfBase(base));
}
- private byte getGenericAverageQualOfBase(final BaseIndex base, final long[] sumQuals) {
- return (byte) (sumQuals[base.index] / countOfBase(base));
+ private long getInsertionQual(final BaseIndex base) {
+ switch (base) {
+ case A: return sumInsertionQual_A;
+ case C: return sumInsertionQual_C;
+ case G: return sumInsertionQual_G;
+ case T: return sumInsertionQual_T;
+ case D: return sumInsertionQual_D;
+ case I: return sumInsertionQual_I;
+ case N: return sumInsertionQual_N;
+ default: throw new IllegalArgumentException(base.name());
+ }
+ }
+
+ private long getDeletionQual(final BaseIndex base) {
+ switch (base) {
+ case A: return sumDeletionQual_A;
+ case C: return sumDeletionQual_C;
+ case G: return sumDeletionQual_G;
+ case T: return sumDeletionQual_T;
+ case D: return sumDeletionQual_D;
+ case I: return sumDeletionQual_I;
+ case N: return sumDeletionQual_N;
+ default: throw new IllegalArgumentException(base.name());
+ }
}
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java
index 5b34a2303..17ce3c90d 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java
@@ -62,73 +62,107 @@ import com.google.java.contract.Requires;
public final static BaseIndex MAX_BASE_INDEX_WITH_NO_COUNTS = BaseIndex.N;
public final static byte MAX_BASE_WITH_NO_COUNTS = MAX_BASE_INDEX_WITH_NO_COUNTS.getByte();
- private final int[] counts; // keeps track of the base counts
- private final long[] sumQuals; // keeps track of the quals of each base
+
+ private int count_A = 0; // keeps track of the base counts
+ private int sumQual_A = 0; // keeps track of the quals of each base
+ private int count_C = 0;
+ private int sumQual_C = 0;
+ private int count_G = 0;
+ private int sumQual_G = 0;
+ private int count_T = 0;
+ private int sumQual_T = 0;
+ private int count_D = 0;
+ private int sumQual_D = 0;
+ private int count_I = 0;
+ private int sumQual_I = 0;
+ private int count_N = 0;
+ private int sumQual_N = 0;
private int totalCount = 0; // keeps track of total count since this is requested so often
- public BaseCounts() {
- counts = new int[BaseIndex.values().length];
- sumQuals = new long[BaseIndex.values().length];
- for (final BaseIndex i : BaseIndex.values()) {
- counts[i.index] = 0;
- sumQuals[i.index] = 0L;
- }
- }
public static BaseCounts createWithCounts(int[] countsACGT) {
BaseCounts baseCounts = new BaseCounts();
- baseCounts.counts[BaseIndex.A.index] = countsACGT[0];
- baseCounts.counts[BaseIndex.C.index] = countsACGT[1];
- baseCounts.counts[BaseIndex.G.index] = countsACGT[2];
- baseCounts.counts[BaseIndex.T.index] = countsACGT[3];
+ baseCounts.count_A = countsACGT[0];
+ baseCounts.count_C = countsACGT[1];
+ baseCounts.count_G = countsACGT[2];
+ baseCounts.count_T = countsACGT[3];
baseCounts.totalCount = countsACGT[0] + countsACGT[1] + countsACGT[2] + countsACGT[3];
return baseCounts;
}
@Requires("other != null")
public void add(final BaseCounts other) {
- for (final BaseIndex i : BaseIndex.values()) {
- final int otherCount = other.counts[i.index];
- counts[i.index] += otherCount;
- totalCount += otherCount;
- }
+ this.count_A += other.count_A;
+ this.count_C += other.count_C;
+ this.count_G += other.count_G;
+ this.count_T += other.count_T;
+ this.count_D += other.count_D;
+ this.count_I += other.count_I;
+ this.count_N += other.count_N;
+ this.totalCount += other.totalCount;
}
@Requires("other != null")
public void sub(final BaseCounts other) {
- for (final BaseIndex i : BaseIndex.values()) {
- final int otherCount = other.counts[i.index];
- counts[i.index] -= otherCount;
- totalCount -= otherCount;
- }
+ this.count_A -= other.count_A;
+ this.count_C -= other.count_C;
+ this.count_G -= other.count_G;
+ this.count_T -= other.count_T;
+ this.count_D -= other.count_D;
+ this.count_I -= other.count_I;
+ this.count_N -= other.count_N;
+ this.totalCount -= other.totalCount;
}
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
public void incr(final byte base) {
- final BaseIndex i = BaseIndex.byteToBase(base);
- counts[i.index]++;
- totalCount++;
+ add(BaseIndex.byteToBase(base), 1);
}
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
public void incr(final BaseIndex base, final byte qual) {
- counts[base.index]++;
- totalCount++;
- sumQuals[base.index] += qual;
+ switch (base) {
+ case A: ++count_A; sumQual_A += qual; break;
+ case C: ++count_C; sumQual_C += qual; break;
+ case G: ++count_G; sumQual_G += qual; break;
+ case T: ++count_T; sumQual_T += qual; break;
+ case D: ++count_D; sumQual_D += qual; break;
+ case I: ++count_I; sumQual_I += qual; break;
+ case N: ++count_N; sumQual_N += qual; break;
+ }
+ ++totalCount;
}
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
public void decr(final byte base) {
- final BaseIndex i = BaseIndex.byteToBase(base);
- counts[i.index]--;
- totalCount--;
+ add(BaseIndex.byteToBase(base), -1);
+ }
+
+ private void add(final BaseIndex base, int amount) {
+ switch(base) {
+ case A: count_A += amount; break;
+ case C: count_C += amount; break;
+ case G: count_G += amount; break;
+ case T: count_T += amount; break;
+ case D: count_D += amount; break;
+ case I: count_I += amount; break;
+ case N: count_N += amount; break;
+ }
+ totalCount += amount;
}
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
public void decr(final BaseIndex base, final byte qual) {
- counts[base.index]--;
- totalCount--;
- sumQuals[base.index] -= qual;
+ switch (base) {
+ case A: --count_A; sumQual_A -= qual; break;
+ case C: --count_C; sumQual_C -= qual; break;
+ case G: --count_G; sumQual_G -= qual; break;
+ case T: --count_T; sumQual_T -= qual; break;
+ case D: --count_D; sumQual_D -= qual; break;
+ case I: --count_I; sumQual_I -= qual; break;
+ case N: --count_N; sumQual_N -= qual; break;
+ }
+ --totalCount;
}
@Ensures("result >= 0")
@@ -138,12 +172,21 @@ import com.google.java.contract.Requires;
@Ensures("result >= 0")
public long getSumQuals(final BaseIndex base) {
- return sumQuals[base.index];
+ switch (base) {
+ case A: return sumQual_A;
+ case C: return sumQual_C;
+ case G: return sumQual_G;
+ case T: return sumQual_T;
+ case D: return sumQual_D;
+ case I: return sumQual_I;
+ case N: return sumQual_N;
+ default: throw new IllegalArgumentException(base.name());
+ }
}
@Ensures("result >= 0")
public byte averageQuals(final byte base) {
- return (byte) (getSumQuals(base) / countOfBase(base));
+ return averageQuals(BaseIndex.byteToBase(base));
}
@Ensures("result >= 0")
@@ -158,12 +201,21 @@ import com.google.java.contract.Requires;
@Ensures("result >= 0")
public int countOfBase(final BaseIndex base) {
- return counts[base.index];
+ switch (base) {
+ case A: return count_A;
+ case C: return count_C;
+ case G: return count_G;
+ case T: return count_T;
+ case D: return count_D;
+ case I: return count_I;
+ case N: return count_N;
+ default: throw new IllegalArgumentException(base.name());
+ }
}
@Ensures("result >= 0")
public long sumQualsOfBase(final BaseIndex base) {
- return sumQuals[base.index];
+ return getSumQuals(base);
}
@Ensures("result >= 0")
@@ -196,14 +248,14 @@ import com.google.java.contract.Requires;
*/
@Ensures({"result >=0.0", "result<= 1.0"})
public double baseCountProportion(final BaseIndex baseIndex) {
- return (totalCount == 0) ? 0.0 : (double)counts[baseIndex.index] / (double)totalCount;
+ return (totalCount == 0) ? 0.0 : (double)countOfBase(baseIndex) / (double)totalCount;
}
@Ensures("result != null")
public String toString() {
StringBuilder b = new StringBuilder();
for (final BaseIndex i : BaseIndex.values()) {
- b.append(i.toString()).append("=").append(counts[i.index]).append(",");
+ b.append(i.toString()).append("=").append(countOfBase(i)).append(",");
}
return b.toString();
}
@@ -216,7 +268,7 @@ import com.google.java.contract.Requires;
public BaseIndex baseIndexWithMostCounts() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
for (final BaseIndex i : BaseIndex.values()) {
- if (counts[i.index] > counts[maxI.index])
+ if (countOfBase(i) > countOfBase(maxI))
maxI = i;
}
return maxI;
@@ -226,18 +278,12 @@ import com.google.java.contract.Requires;
public BaseIndex baseIndexWithMostCountsWithoutIndels() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
for (final BaseIndex i : BaseIndex.values()) {
- if (i.isNucleotide() && counts[i.index] > counts[maxI.index])
+ if (i.isNucleotide() && countOfBase(i) > countOfBase(maxI))
maxI = i;
}
return maxI;
}
- private boolean hasHigherCount(final BaseIndex targetIndex, final BaseIndex testIndex) {
- final int targetCount = counts[targetIndex.index];
- final int testCount = counts[testIndex.index];
- return ( targetCount > testCount || (targetCount == testCount && sumQuals[targetIndex.index] > sumQuals[testIndex.index]) );
- }
-
public byte baseWithMostProbability() {
return baseIndexWithMostProbability().getByte();
}
@@ -246,25 +292,25 @@ import com.google.java.contract.Requires;
public BaseIndex baseIndexWithMostProbability() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
for (final BaseIndex i : BaseIndex.values()) {
- if (sumQuals[i.index] > sumQuals[maxI.index])
+ if (getSumQuals(i) > getSumQuals(maxI))
maxI = i;
}
- return (sumQuals[maxI.index] > 0L ? maxI : baseIndexWithMostCounts());
+ return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCounts());
}
@Ensures("result != null")
public BaseIndex baseIndexWithMostProbabilityWithoutIndels() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
for (final BaseIndex i : BaseIndex.values()) {
- if (i.isNucleotide() && sumQuals[i.index] > sumQuals[maxI.index])
+ if (i.isNucleotide() && getSumQuals(i) > getSumQuals(maxI))
maxI = i;
}
- return (sumQuals[maxI.index] > 0L ? maxI : baseIndexWithMostCountsWithoutIndels());
+ return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCountsWithoutIndels());
}
@Ensures("result >=0")
public int totalCountWithoutIndels() {
- return totalCount - counts[BaseIndex.D.index] - counts[BaseIndex.I.index];
+ return totalCount - countOfBase(BaseIndex.D) - countOfBase(BaseIndex.I);
}
/**
@@ -277,10 +323,6 @@ import com.google.java.contract.Requires;
@Ensures({"result >=0.0", "result<= 1.0"})
public double baseCountProportionWithoutIndels(final BaseIndex base) {
final int total = totalCountWithoutIndels();
- return (total == 0) ? 0.0 : (double)counts[base.index] / (double)total;
- }
-
- public int[] countsArray() {
- return counts.clone();
+ return (total == 0) ? 0.0 : (double)countOfBase(base) / (double)total;
}
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java
index cb3eed1a2..e41878a0b 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java
@@ -69,6 +69,12 @@ public enum BaseIndex {
public byte getByte() { return b; }
+ /**
+ * Ordinal is stored in SyntheticRead rather than enum to save object reference, and store as byte for compactness.
+ * It is stored as byte, and this method merely eliminates a cast.
+ */
+ public byte getOrdinalByte() { return (byte)ordinal(); }
+
private BaseIndex(char base, int index) {
this.b = (byte)base;
this.index = index;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
index 3c475576a..a8a765ddc 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
@@ -59,6 +59,7 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.help.HelpConstants;
import java.util.HashMap;
import java.util.Map;
@@ -87,7 +88,7 @@ import java.util.Map;
* @since 10/30/11
*/
-@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public class CompareBAM extends LocusWalker