diff --git a/build.xml b/build.xml index 0a4cd2171..ca5d22a5a 100644 --- a/build.xml +++ b/build.xml @@ -646,7 +646,7 @@ - + @@ -679,20 +679,6 @@ - - - - - - - - - - - - - - @@ -750,12 +736,6 @@ - - - - - - @@ -1249,11 +1229,7 @@ - - - - - + diff --git a/ivy.xml b/ivy.xml index 5a8c3986a..0761cb411 100644 --- a/ivy.xml +++ b/ivy.xml @@ -23,86 +23,90 @@ --> - - - - - - - - - - - + + + + + + + + + + + - - + + - - - - - - + + + + + + - - + + - - + + - - + + - - - - - - - - - - + + - - + + + + + + + - - + + - - + + - - + + - - - + + - - - - + + + - - - + + + + - - - + + + - - + + + - - + + - - - - - + + + + + + + + + + + + diff --git a/licensing/GATK2_beta_license.doc b/licensing/GATK2_beta_license.doc index 4fa04a3f6..6c12bfe30 100644 Binary files a/licensing/GATK2_beta_license.doc and b/licensing/GATK2_beta_license.doc differ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/protected/java/src/org/broadinstitute/sting/gatk/DummyProtectedClass.java old mode 100755 new mode 100644 similarity index 52% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java rename to protected/java/src/org/broadinstitute/sting/gatk/DummyProtectedClass.java index 33adf4417..c1324aea4 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/DummyProtectedClass.java @@ -1,7 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +package org.broadinstitute.sting.gatk; /* * Copyright (c) 2009 The Broad Institute @@ -28,34 +25,10 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * OTHER DEALINGS IN THE SOFTWARE. */ -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Oct 30, 2009 - * - * The Read Group covariate. - */ +import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; -public class ReadGroupCovariate implements RequiredCovariate { +public class DummyProtectedClass implements ProtectedPackageSource { - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - final String readGroupId = read.getReadGroup().getReadGroupId(); - for (int i = 0; i < read.getReadLength(); i++) { - comparable[i] = readGroupId; - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return str; - } + // THIS CLASS IS USED JUST SO THAT WE CAN TEST WHETHER WE ARE USING THE LITE OR FULL VERSION OF THE GATK + // **** DO NOT REMOVE! **** } - - diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java new file mode 100644 index 000000000..d714ca185 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -0,0 +1,103 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.recalibration.RecalDatum; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; + +public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource { + + // optimizations: don't reallocate an array each time + private byte[] tempQualArray; + private boolean[] tempErrorArray; + + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + super.initialize(covariates, recalibrationTables); + tempQualArray = new byte[EventType.values().length]; + tempErrorArray = new boolean[EventType.values().length]; + } + + /** + * Loop through the list of requested covariates and pick out the value from the read, offset, and reference + * Using the list of covariate values as a key, pick out the RecalDatum and increment, + * adding one to the number of observations and potentially one to the number of mismatches for all three + * categories (mismatches, insertions and deletions). + * + * @param pileupElement The pileup element to update + * @param refBase The reference base at this locus + */ + public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { + final int offset = pileupElement.getOffset(); + final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); + + tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual(); + tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); + tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual(); + tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion(); + tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual(); + tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase(); + + for (final EventType eventType : EventType.values()) { + final int[] keys = readCovariates.getKeySet(offset, eventType); + final int eventIndex = eventType.index; + final byte qual = tempQualArray[eventIndex]; + final boolean isError = tempErrorArray[eventIndex]; + + final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); + final RecalDatum rgThisDatum = createDatumObject(qual, isError); + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + rgRecalTable.put(rgThisDatum, keys[0], eventIndex); + else + rgPreviousDatum.combine(rgThisDatum); + + final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); + if (qualPreviousDatum == null) + qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); + else + qualPreviousDatum.increment(isError); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); + final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); + if (covPreviousDatum == null) + covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); + else + covPreviousDatum.increment(isError); + } + } + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAMWalker.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java similarity index 95% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAMWalker.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java index 3e07295e7..9809709a8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAMWalker.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; @@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.HashMap; import java.util.Map; @@ -39,8 +41,9 @@ import java.util.Map; * @since 10/30/11 */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) -public class CompareBAMWalker extends LocusWalker, CompareBAMWalker.TestResults> { +public class CompareBAM extends LocusWalker, CompareBAM.TestResults> { @Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID; @Argument(required = false, shortName = "teq", fullName = "test_equal_bases", doc = "Test if the bases marked as '=' are indeed ref bases.") public boolean TEST_EQUAL_BASES = false; @Argument(required = false, shortName = "tbc", fullName = "test_base_counts", doc = "Test if the base counts tag in consensus reads are accurate.") public boolean TEST_BASE_COUNTS = false; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java index 0ac9630c2..44971ca38 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java @@ -53,7 +53,7 @@ public class MultiSampleCompressor implements Compressor { final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, final int minBaseQual, - final ReduceReadsWalker.DownsampleStrategy downsampleStrategy) { + final ReduceReads.DownsampleStrategy downsampleStrategy) { for ( String name : SampleUtils.getSAMFileSamples(header) ) { compressorsPerSample.put(name, new SingleSampleCompressor(name, contextSize, downsampleCoverage, diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java similarity index 90% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 095149bae..177050667 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -25,13 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; import net.sf.samtools.util.SequenceUtil; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.*; @@ -46,6 +44,7 @@ import org.broadinstitute.sting.utils.GenomeLocComparator; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -81,9 +80,10 @@ import java.util.*; * */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.INTERVAL) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class}) -public class ReduceReadsWalker extends ReadWalker, ReduceReadsStash> { +public class ReduceReads extends ReadWalker, ReduceReadsStash> { @Output protected StingSAMFileWriter out; @@ -180,7 +180,7 @@ public class ReduceReadsWalker extends ReadWalker, Red * A value of 0 turns downsampling off. */ @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false) - protected int downsampleCoverage = 0; + protected int downsampleCoverage = 250; @Hidden @Argument(fullName = "", shortName = "dl", doc = "", required = false) @@ -251,7 +251,7 @@ public class ReduceReadsWalker extends ReadWalker, Red LinkedList mappedReads; totalReads++; if (!debugRead.isEmpty() && read.getReadName().contains(debugRead)) - System.out.println("Found debug read!"); + System.out.println("Found debug read!"); if (debugLevel == 1) System.out.printf("\nOriginal: %s %s %d %d\n", read, read.getCigar(), read.getAlignmentStart(), read.getAlignmentEnd()); @@ -260,7 +260,14 @@ public class ReduceReadsWalker extends ReadWalker, Red // attribute hash so we can determine later if we need to write down the alignment shift to the reduced BAM file read.setTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, read.getAlignmentStart()); read.setTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, read.getAlignmentEnd()); - + + // Check if the read goes beyond the boundaries of the chromosome, and hard clip those boundaries. + int chromosomeLength = ref.getGenomeLocParser().getContigInfo(read.getReferenceName()).getSequenceLength(); + if (read.getSoftStart() < 0) + read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart() - 1); + if (read.getSoftEnd() > chromosomeLength) + read = ReadClipper.hardClipByReadCoordinates(read, chromosomeLength - read.getSoftStart() + 1, read.getReadLength() - 1); + if (!DONT_SIMPLIFY_READS) read.simplify(); // Clear all unnecessary attributes if (!DONT_CLIP_ADAPTOR_SEQUENCES) @@ -532,81 +539,12 @@ public class ReduceReadsWalker extends ReadWalker, Red if (debugLevel == 1) System.out.println("BAM: " + read.getCigar() + " " + read.getAlignmentStart() + " " + read.getAlignmentEnd()); -// if (!DONT_USE_SOFTCLIPPED_BASES) -// reSoftClipBases(read); - if (!DONT_COMPRESS_READ_NAMES) compressReadName(read); out.addAlignment(read); } - private void reSoftClipBases(GATKSAMRecord read) { - Integer left = (Integer) read.getTemporaryAttribute("SL"); - Integer right = (Integer) read.getTemporaryAttribute("SR"); - if (left != null || right != null) { - Cigar newCigar = new Cigar(); - for (CigarElement element : read.getCigar().getCigarElements()) { - newCigar.add(new CigarElement(element.getLength(), element.getOperator())); - } - - if (left != null) { - newCigar = updateFirstSoftClipCigarElement(left, newCigar); - read.setAlignmentStart(read.getAlignmentStart() + left); - } - - if (right != null) { - Cigar invertedCigar = invertCigar(newCigar); - newCigar = invertCigar(updateFirstSoftClipCigarElement(right, invertedCigar)); - } - read.setCigar(newCigar); - } - } - - /** - * Facility routine to revert the first element of a Cigar string (skipping hard clips) into a soft-clip. - * To be used on both ends if provided a flipped Cigar - * - * @param softClipSize the length of the soft clipped element to add - * @param originalCigar the original Cigar string - * @return a new Cigar object with the soft clips added - */ - private Cigar updateFirstSoftClipCigarElement (int softClipSize, Cigar originalCigar) { - Cigar result = new Cigar(); - CigarElement leftElement = new CigarElement(softClipSize, CigarOperator.S); - boolean updated = false; - for (CigarElement element : originalCigar.getCigarElements()) { - if (!updated && element.getOperator() == CigarOperator.M) { - result.add(leftElement); - int newLength = element.getLength() - softClipSize; - if (newLength > 0) - result.add(new CigarElement(newLength, CigarOperator.M)); - updated = true; - } - else - result.add(element); - } - return result; - } - - /** - * Given a cigar string, returns the inverted cigar string. - * - * @param cigar the original cigar - * @return the inverted cigar - */ - private Cigar invertCigar(Cigar cigar) { - Stack stack = new Stack(); - for (CigarElement e : cigar.getCigarElements()) - stack.push(e); - Cigar inverted = new Cigar(); - while (!stack.empty()) { - inverted.add(stack.pop()); - } - return inverted; - } - - /** * Quality control procedure that checks if the consensus reads contains too many * mismatches with the reference. This should never happen and is a good trigger for @@ -663,7 +601,7 @@ public class ReduceReadsWalker extends ReadWalker, Red * @return Returns true if the read is the original read that went through map(). */ private boolean isOriginalRead(LinkedList list, GATKSAMRecord read) { - return isWholeGenome() || (list.getFirst().equals(read) && ReadUtils.getReadAndIntervalOverlapType(read, intervalList.first()) == ReadUtils.ReadAndIntervalOverlap.OVERLAP_CONTAINED); + return isWholeGenome() || list.getFirst().equals(read); } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java index fd0dfa1ff..6d2c2d215 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java @@ -26,7 +26,7 @@ public class SingleSampleCompressor implements Compressor { protected double minIndelProportionToTriggerVariant; protected int minBaseQual; - protected ReduceReadsWalker.DownsampleStrategy downsampleStrategy; + protected ReduceReads.DownsampleStrategy downsampleStrategy; public SingleSampleCompressor(final String sampleName, final int contextSize, @@ -35,7 +35,7 @@ public class SingleSampleCompressor implements Compressor { final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, final int minBaseQual, - final ReduceReadsWalker.DownsampleStrategy downsampleStrategy) { + final ReduceReads.DownsampleStrategy downsampleStrategy) { this.sampleName = sampleName; this.contextSize = contextSize; this.downsampleCoverage = downsampleCoverage; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 68dfd041b..bdb9ef843 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -53,7 +53,7 @@ public class SlidingWindow { protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value protected int MIN_MAPPING_QUALITY; - protected ReduceReadsWalker.DownsampleStrategy downsampleStrategy; + protected ReduceReads.DownsampleStrategy downsampleStrategy; private boolean hasIndelQualities; /** @@ -82,7 +82,7 @@ public class SlidingWindow { } - public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader header, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReadsWalker.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities) { + public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader header, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities) { this.stopLocation = -1; this.contextSize = contextSize; this.downsampleCoverage = downsampleCoverage; @@ -499,7 +499,7 @@ public class SlidingWindow { result.addAll(addToSyntheticReads(0, start)); result.addAll(finalizeAndAdd(ConsensusType.BOTH)); - for (GATKSAMRecord read : result) { + for (GATKSAMRecord read : allReads) { readsInWindow.remove(read); // todo -- not optimal, but needs to be done so the next region doesn't try to remove the same reads from the header counts. } @@ -536,6 +536,10 @@ public class SlidingWindow { * @return a list of reads selected by the downsampler to cover the window to at least the desired coverage */ protected List downsampleVariantRegion(final List allReads) { + int nReads = allReads.size(); + if (nReads == 0) + return allReads; + double fraction = 100 / allReads.size(); if (fraction >= 1) return allReads; @@ -545,6 +549,7 @@ public class SlidingWindow { return downsampler.consumeDownsampledItems(); } + /** * Properly closes a Sliding Window, finalizing all consensus and variant * regions that still exist regardless of being able to fulfill the @@ -627,7 +632,7 @@ public class SlidingWindow { int locationIndex = startLocation < 0 ? 0 : readStart - startLocation; if (removeRead && locationIndex < 0) - throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation); + throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " start " + read.getUnclippedStart() + "," + read.getUnclippedEnd() + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation); if (!removeRead) { // we only need to create new header elements if we are adding the read, not when we're removing it if (locationIndex < 0) { // Do we need to add extra elements before the start of the header? -- this may happen if the previous read was clipped and this alignment starts before the beginning of the window diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java index 9ee1a4634..6134101d9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java @@ -5,7 +5,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; @@ -102,7 +102,7 @@ public class SyntheticRead { * @param base the base to add * @param count number of reads with this base */ - @Requires("count < Byte.MAX_VALUE") + @Requires("count <= Byte.MAX_VALUE") public void add(BaseIndex base, byte count, byte qual, byte insQual, byte delQual, double mappingQuality) { counts.add(count); bases.add(base); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java new file mode 100644 index 000000000..8e4ca9595 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java @@ -0,0 +1,295 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 7/21/11 + * Time: 2:21 PM + * + * This is a site based implementation of an Error Model. The error model is a probability + * distribution for the site given the phred scaled quality. + */ +public class ErrorModel { + private byte maxQualityScore; + private byte minQualityScore; + private byte phredScaledPrior; + private double log10minPower; + private int refDepth; + private boolean hasData = false; + private ProbabilityVector probabilityVector; + private static final boolean compressRange = false; + + private static final double log10MinusE = Math.log10(Math.exp(1.0)); + private static final boolean DEBUG = false; + /** + * Calculates the probability of the data (reference sample reads) given the phred scaled site quality score. + * + * @param UAC Argument Collection + * @param refSamplePileup Reference sample pileup + * @param refSampleVC VC with True alleles in reference sample pileup + */ + public ErrorModel (final UnifiedArgumentCollection UAC, + final ReadBackedPileup refSamplePileup, + VariantContext refSampleVC, final ReferenceContext refContext) { + this.maxQualityScore = UAC.maxQualityScore; + this.minQualityScore = UAC.minQualityScore; + this.phredScaledPrior = UAC.phredScaledPrior; + log10minPower = Math.log10(UAC.minPower); + + PairHMMIndelErrorModel pairModel = null; + LinkedHashMap haplotypeMap = null; + HashMap> indelLikelihoodMap = null; + double[][] perReadLikelihoods = null; + + double[] model = new double[maxQualityScore+1]; + Arrays.fill(model,Double.NEGATIVE_INFINITY); + + boolean hasCalledAlleles = false; + if (refSampleVC != null) { + + for (Allele allele : refSampleVC.getAlleles()) { + if (allele.isCalled()) { + hasCalledAlleles = true; + break; + } + } + haplotypeMap = new LinkedHashMap(); + if (refSampleVC.isIndel()) { + pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, + UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION); + indelLikelihoodMap = new HashMap>(); + IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements + } + } + + double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore)); + if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) { + for (byte q=minQualityScore; q<=maxQualityScore; q++) { + // maximum uncertainty if there's no ref data at site + model[q] = p; + } + this.refDepth = 0; + } + else { + hasData = true; + int matches = 0; + int coverage = 0; + + Allele refAllele = refSampleVC.getReference(); + + if (refSampleVC.isIndel()) { + final int readCounts[] = new int[refSamplePileup.getNumberOfElements()]; + //perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()]; + final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles()); + if (!haplotypeMap.isEmpty()) + perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts); + } + int idx = 0; + for (PileupElement refPileupElement : refSamplePileup) { + if (DEBUG) + System.out.println(refPileupElement.toString()); + boolean isMatch = false; + for (Allele allele : refSampleVC.getAlleles()) { + boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase()); + if (DEBUG) System.out.println(m); + isMatch |= m; + } + if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) { + // ignore match/mismatch if reads, as determined by their likelihood, are not informative + double[] perAlleleLikelihoods = perReadLikelihoods[idx++]; + if (!isInformativeElement(perAlleleLikelihoods)) + matches++; + else + matches += (isMatch?1:0); + + } else { + matches += (isMatch?1:0); + } + coverage++; + } + + int mismatches = coverage - matches; + //System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches); + for (byte q=minQualityScore; q<=maxQualityScore; q++) { + if (coverage==0) + model[q] = p; + else + model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches); + } + this.refDepth = coverage; + } + + // compress probability vector + this.probabilityVector = new ProbabilityVector(model, compressRange); + } + + + @Requires("likelihoods.length>0") + private boolean isInformativeElement(double[] likelihoods) { + // if likelihoods are the same, they're not informative + final double thresh = 0.1; + int maxIdx = MathUtils.maxElementIndex(likelihoods); + int minIdx = MathUtils.minElementIndex(likelihoods); + if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh) + return false; + else + return true; + } + /** + * Simple constructor that just takes a given log-probability vector as error model. + * Only intended for unit testing, not general usage. + * @param pvector Given vector of log-probabilities + * + */ + public ErrorModel(double[] pvector) { + this.maxQualityScore = (byte)(pvector.length-1); + this.minQualityScore = 0; + this.probabilityVector = new ProbabilityVector(pvector, compressRange); + this.hasData = true; + + } + + public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) { + if (DEBUG) + System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n", + pileupElement.getBase(), pileupElement.isBeforeDeletionStart(), + pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString()); + + //pileupElement. + // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch + if (allele.isReference()) { + // for a ref allele, any base mismatch or new indel is a mismatch. + if(allele.getBases().length>0) + // todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location + // could clean up/simplify this when unpadding is removed + return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart()); + else + // either null allele to compare, or ref/alt lengths are different (indel by definition). + // if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch + return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart()); + } + + // for non-ref alleles to compare: + if (refAllele.getBases().length == allele.getBases().length) + // alleles have the same length (eg snp or mnp) + return pileupElement.getBase() == allele.getBases()[0]; + + // for non-ref alleles, + byte[] alleleBases = allele.getBases(); + int eventLength = alleleBases.length - refAllele.getBases().length; + if (eventLength < 0 && pileupElement.isBeforeDeletionStart() && pileupElement.getEventLength() == -eventLength) + return true; + + if (eventLength > 0 && pileupElement.isBeforeInsertion() && + Arrays.equals(pileupElement.getEventBases().getBytes(),alleleBases)) + return true; + + return false; + } + + + /** + * What's the log-likelihood that a site's quality is equal to q? If we see N observations and n mismatches, + * and assuming each match is independent of each other and that the match probability is just dependent of + * the site quality, so p = 10.^-q/10. + * Since we'll normally have relatively high Q sites and deep coverage in reference samples (ie p small, N high), + * to avoid underflows we'll use the Poisson approximation with lambda = N*p. + * Hence, the log-likelihood of q i.e. Pr(Nmismatches = n | SiteQ = q) ~ Poisson(n | lambda = p*N) with p as above. + * @param q Desired q to get likelihood from + * @param coverage Total coverage + * @param mismatches Number of mismatches + * @return Likelihood of observations as a function of q + */ + @Requires({ + "q >= minQualityScore", + "q <= maxQualityScore", + "coverage >= 0", + "mismatches >= 0", + "mismatches <= coverage" + }) + private double log10PoissonProbabilitySiteGivenQual(byte q, int coverage, int mismatches) { + // same as log10ProbabilitySiteGivenQual but with Poisson approximation to avoid numerical underflows + double lambda = MathUtils.phredScaleToProbability(q) * (double )coverage; + // log10(e^-lambda*lambda^k/k!) = -lambda + k*log10(lambda) - log10factorial(k) + return Math.log10(lambda)*mismatches - lambda*log10MinusE- MathUtils.log10Factorial(mismatches); + } + + @Requires({"qual-minQualityScore <= maxQualityScore"}) + public double getSiteLogErrorProbabilityGivenQual (int qual) { + return probabilityVector.getLogProbabilityForIndex(qual); + } + + public byte getMaxQualityScore() { + return maxQualityScore; + } + + public byte getMinQualityScore() { + return minQualityScore; + } + + public int getMinSignificantQualityScore() { + return new ProbabilityVector(probabilityVector,true).getMinVal(); + } + + public int getMaxSignificantQualityScore() { + return new ProbabilityVector(probabilityVector,true).getMaxVal(); + } + + public int getReferenceDepth() { + return refDepth; + } + public boolean hasData() { + return hasData; + } + + public ProbabilityVector getErrorModelVector() { + return probabilityVector; + } + + public String toString() { + String result = "("; + boolean skipComma = true; + for (double v : probabilityVector.getProbabilityVector()) { + if (skipComma) { + skipComma = false; + } + else { + result += ","; + } + result += String.format("%.4f", v); + } + return result + ")"; + } + + public static int getTotalReferenceDepth(HashMap perLaneErrorModels) { + int n=0; + for (ErrorModel e : perLaneErrorModels.values()) { + n += e.getReferenceDepth(); + } + return n; + } + + /* +@Requires({"maxAlleleCount >= 0"}) +//todo -- memoize this function + public boolean hasPowerForMaxAC (int maxAlleleCount) { + int siteQ = (int) Math.ceil(MathUtils.probabilityToPhredScale((double) 1/maxAlleleCount)); + double log10CumSum = getCumulativeSum(siteQ); + return log10CumSum < log10minPower; + } */ +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java new file mode 100644 index 000000000..78ab11eb1 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java @@ -0,0 +1,706 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.io.PrintStream; +import java.util.*; + +public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel { + static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them + final protected UnifiedArgumentCollection UAC; + + private final int ploidy; + private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 + private final static boolean VERBOSE = false; + + protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { + super(UAC, N, logger, verboseWriter); + ploidy = UAC.samplePloidy; + this.UAC = UAC; + + } + + public List getLog10PNonRef(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final AlleleFrequencyCalculationResult result) { + + GenotypesContext GLs = vc.getGenotypes(); + List alleles = vc.getAlleles(); + + // don't try to genotype too many alternate alleles + if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) { + logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument"); + + alleles = new ArrayList(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1); + alleles.add(vc.getReference()); + alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE, ploidy)); + + + GLs = subsetAlleles(vc, alleles, false, ploidy); + } + + combineSinglePools(GLs, alleles.size(), ploidy, log10AlleleFrequencyPriors, result); + + return alleles; + } + + + /** + * Simple wrapper class to hold values of combined pool likelihoods. + * For fast hashing and fast retrieval, there's a hash map that shadows main list. + * + */ + static class CombinedPoolLikelihoods { + private LinkedList alleleCountSetList; + private HashMap conformationMap; + private double maxLikelihood; + + + public CombinedPoolLikelihoods() { + // final int numElements = GenotypeLikelihoods.numLikelihoods(); + alleleCountSetList = new LinkedList(); + conformationMap = new HashMap(); + maxLikelihood = Double.NEGATIVE_INFINITY; + } + + public void add(ExactACset set) { + alleleCountSetList.add(set); + conformationMap.put(set.ACcounts, set); + final double likelihood = set.log10Likelihoods[0]; + + if (likelihood > maxLikelihood ) + maxLikelihood = likelihood; + + } + + public boolean hasConformation(int[] ac) { + return conformationMap.containsKey(new ExactACcounts(ac)); + + } + + public double getLikelihoodOfConformation(int[] ac) { + return conformationMap.get(new ExactACcounts(ac)).log10Likelihoods[0]; + } + + public double getGLOfACZero() { + return alleleCountSetList.get(0).log10Likelihoods[0]; // AC 0 is always at beginning of list + } + + public int getLength() { + return alleleCountSetList.size(); + } + } + + /** + * + * Chooses N most likely alleles in a set of pools (samples) based on GL sum over alt alleles + * @param vc Input variant context + * @param numAllelesToChoose Number of alleles to choose + * @param ploidy Ploidy per pool + * @return list of numAllelesToChoose most likely alleles + */ + + private static List chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose, int ploidy) { + final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); + final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles]; + for ( int i = 0; i < numOriginalAltAlleles; i++ ) + likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i)); + + // based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype + final ArrayList GLs = getGLs(vc.getGenotypes()); + for ( final double[] likelihoods : GLs ) { + + final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); + final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL); + // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele + for (int k=1; k < acCount.length;k++) { + if (acCount[k] > 0) + likelihoodSums[k-1].sum += likelihoods[PLindexOfBestGL]; + + } + } + + // sort them by probability mass and choose the best ones + Collections.sort(Arrays.asList(likelihoodSums)); + final ArrayList bestAlleles = new ArrayList(numAllelesToChoose); + for ( int i = 0; i < numAllelesToChoose; i++ ) + bestAlleles.add(likelihoodSums[i].allele); + + final ArrayList orderedBestAlleles = new ArrayList(numAllelesToChoose); + for ( Allele allele : vc.getAlternateAlleles() ) { + if ( bestAlleles.contains(allele) ) + orderedBestAlleles.add(allele); + } + + return orderedBestAlleles; + } + + + /** + * Simple non-optimized version that combines GLs from several pools and produces global AF distribution. + * @param GLs Inputs genotypes context with per-pool GLs + * @param numAlleles Number of alternate alleles + * @param ploidyPerPool Number of samples per pool + * @param log10AlleleFrequencyPriors Frequency priors + * @param result object to fill with output values + */ + protected static void combineSinglePools(final GenotypesContext GLs, + final int numAlleles, + final int ploidyPerPool, + final double[] log10AlleleFrequencyPriors, + final AlleleFrequencyCalculationResult result) { + + final ArrayList genotypeLikelihoods = getGLs(GLs); + + + int combinedPloidy = 0; + + // Combine each pool incrementally - likelihoods will be renormalized at each step + CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods(); + + // first element: zero ploidy, e.g. trivial degenerate distribution + final int[] zeroCounts = new int[numAlleles]; + final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts)); + set.log10Likelihoods[0] = 0.0; + + combinedPoolLikelihoods.add(set); + for (int p=1; p ACqueue = new LinkedList(); + // mapping of ExactACset indexes to the objects + final HashMap indexesToACset = new HashMap(); + final CombinedPoolLikelihoods newPool = new CombinedPoolLikelihoods(); + + // add AC=0 to the queue + final int[] zeroCounts = new int[numAlleles]; + final int newPloidy = originalPloidy + newGLPloidy; + zeroCounts[0] = newPloidy; + + ExactACset zeroSet = new ExactACset(1, new ExactACcounts(zeroCounts)); + + ACqueue.add(zeroSet); + indexesToACset.put(zeroSet.ACcounts, zeroSet); + + // keep processing while we have AC conformations that need to be calculated + double maxLog10L = Double.NEGATIVE_INFINITY; + while ( !ACqueue.isEmpty() ) { + // compute log10Likelihoods + final ExactACset ACset = ACqueue.remove(); + final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset); + maxLog10L = Math.max(maxLog10L, log10LofKs); + // clean up memory + indexesToACset.remove(ACset.ACcounts); + if ( VERBOSE ) + System.out.printf(" *** removing used set=%s%n", ACset.ACcounts); + + } + return newPool; + } + + // todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods + /** + * + * @param set ExactACset holding conformation to be computed + * @param newPool New pool likelihood holder + * @param originalPool Original likelihood holder + * @param newGL New pool GL vector to combine + * @param log10AlleleFrequencyPriors Prior object + * @param originalPloidy Total ploidy of original combined pool + * @param newGLPloidy Ploidy of GL vector + * @param result AFResult object + * @param maxLog10L max likelihood observed so far + * @param ACqueue Queue of conformations to compute + * @param indexesToACset AC indices of objects in queue + * @return max log likelihood + */ + private static double calculateACConformationAndUpdateQueue(final ExactACset set, + final CombinedPoolLikelihoods newPool, + final CombinedPoolLikelihoods originalPool, + final double[] newGL, + final double[] log10AlleleFrequencyPriors, + final int originalPloidy, + final int newGLPloidy, + final AlleleFrequencyCalculationResult result, + final double maxLog10L, + final LinkedList ACqueue, + final HashMap indexesToACset) { + + // compute likeihood in "set" of new set based on original likelihoods + final int numAlleles = set.ACcounts.counts.length; + final int newPloidy = set.getACsum(); + final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, result); + + + // add to new pool + if (!Double.isInfinite(log10LofK)) + newPool.add(set); + + if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( VERBOSE ) + System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); + return log10LofK; + } + + // iterate over higher frequencies if possible + // by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count. + // so, if first element is zero, it automatically means we have no wiggle since we're in a corner of the conformation space + final int ACwiggle = set.ACcounts.counts[0]; + if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies + return log10LofK; + + + // add conformations for other cases + for ( int allele = 1; allele < numAlleles; allele++ ) { + final int[] ACcountsClone = set.ACcounts.getCounts().clone(); + ACcountsClone[allele]++; + // is this a valid conformation? + int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0]; + ACcountsClone[0] = newPloidy - altSum; + if (ACcountsClone[0] < 0) + continue; + + + GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset); + } + + + return log10LofK; + } + + + /** + * Naive combiner of two multiallelic pools - number of alt alleles must be the same. + * Math is generalization of biallelic combiner. + * + * For vector K representing an allele count conformation, + * Pr(D | AC = K) = Sum_G Pr(D|AC1 = G) Pr (D|AC2=K-G) * F(G,K) + * where F(G,K) = choose(m1,[g0 g1 ...])*choose(m2,[...]) / choose(m1+m2,[k1 k2 ...]) + * @param originalPool First log-likelihood pool GL vector + * @param yy Second pool GL vector + * @param ploidy1 Ploidy of first pool (# of chromosomes in it) + * @param ploidy2 Ploidy of second pool + * @param numAlleles Number of alleles + * @param log10AlleleFrequencyPriors Array of biallelic priors + * @param result Af calculation result object + */ + public static void combineMultiallelicPoolNaively(CombinedPoolLikelihoods originalPool, double[] yy, int ploidy1, int ploidy2, int numAlleles, + final double[] log10AlleleFrequencyPriors, + final AlleleFrequencyCalculationResult result) { +/* + final int dim1 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy1); + final int dim2 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy2); + + if (dim1 != originalPool.getLength() || dim2 != yy.length) + throw new ReviewedStingException("BUG: Inconsistent vector length"); + + if (ploidy2 == 0) + return; + + final int newPloidy = ploidy1 + ploidy2; + + // Say L1(K) = Pr(D|AC1=K) * choose(m1,K) + // and L2(K) = Pr(D|AC2=K) * choose(m2,K) + GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1); + final double[] x = originalPool.getLikelihoodsAsVector(true); + while(firstIterator.hasNext()) { + x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector()); + firstIterator.next(); + } + + GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); + final double[] y = yy.clone(); + while(secondIterator.hasNext()) { + y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector()); + secondIterator.next(); + } + + // initialize output to -log10(choose(m1+m2,[k1 k2...]) + final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy); + final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy); + + + // Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K + while(outputIterator.hasNext()) { + final ExactACset set = new ExactACset(1, new ExactACcounts(outputIterator.getCurrentAltVector())); + double likelihood = computeLofK(set, x,y, log10AlleleFrequencyPriors, numAlleles, ploidy1, ploidy2, result); + + originalPool.add(likelihood, set, outputIterator.getLinearIndex()); + outputIterator.next(); + } +*/ + } + + /** + * Compute likelihood of a particular AC conformation and update AFresult object + * @param set Set of AC counts to compute + * @param firstGLs Original pool likelihoods before combining + * @param secondGL New GL vector with additional pool + * @param log10AlleleFrequencyPriors Allele frequency priors + * @param numAlleles Number of alleles (including ref) + * @param ploidy1 Ploidy of original pool (combined) + * @param ploidy2 Ploidy of new pool + * @param result AFResult object + * @return log-likehood of requested conformation + */ + private static double computeLofK(final ExactACset set, + final CombinedPoolLikelihoods firstGLs, + final double[] secondGL, + final double[] log10AlleleFrequencyPriors, + final int numAlleles, final int ploidy1, final int ploidy2, + final AlleleFrequencyCalculationResult result) { + + final int newPloidy = ploidy1 + ploidy2; + + // sanity check + int totalAltK = set.getACsum(); + if (newPloidy != totalAltK) + throw new ReviewedStingException("BUG: inconsistent sizes of set.getACsum and passed ploidy values"); + + totalAltK -= set.ACcounts.counts[0]; + // totalAltK has sum of alt alleles of conformation now + + + // special case for k = 0 over all k + if ( totalAltK == 0 ) { // all-ref case + final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX]; + set.log10Likelihoods[0] = log10Lof0; + + result.setLog10LikelihoodOfAFzero(log10Lof0); + result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); + + } else { + + // initialize result with denominator + // ExactACset holds by convention the conformation of all alleles, and the sum of all allele count is just the ploidy. + // To compute n!/k1!k2!k3!... we need to compute first n!/(k2!k3!...) and then further divide by k1! where k1=ploidy-sum_k_i + + int[] currentCount = set.ACcounts.getCounts(); + double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount); + + // for current conformation, get all possible ways to break vector K into two components G1 and G2 + final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); + set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY; + while (innerIterator.hasNext()) { + // check if breaking current conformation into g1 and g2 is feasible. + final int[] acCount2 = innerIterator.getCurrentVector(); + final int[] acCount1 = MathUtils.vectorDiff(currentCount, acCount2); + final int idx2 = innerIterator.getLinearIndex(); + // see if conformation is valid and if original pool had this conformation + // for conformation to be valid, all elements of g2 have to be <= elements of current AC set + if (isValidConformation(acCount1,ploidy1) && firstGLs.hasConformation(acCount1)) { + final double gl2 = secondGL[idx2]; + if (!Double.isInfinite(gl2)) { + final double firstGL = firstGLs.getLikelihoodOfConformation(acCount1); + final double num1 = MathUtils.log10MultinomialCoefficient(ploidy1, acCount1); + final double num2 = MathUtils.log10MultinomialCoefficient(ploidy2, acCount2); + final double sum = firstGL + gl2 + num1 + num2; + + set.log10Likelihoods[0] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[0], sum); + } + } + innerIterator.next(); + } + + set.log10Likelihoods[0] += denom; + } + + double log10LofK = set.log10Likelihoods[0]; + + // update the MLE if necessary + final int altCounts[] = Arrays.copyOfRange(set.ACcounts.counts,1, set.ACcounts.counts.length); + result.updateMLEifNeeded(log10LofK, altCounts); + + // apply the priors over each alternate allele + for (final int ACcount : altCounts ) { + if ( ACcount > 0 ) + log10LofK += log10AlleleFrequencyPriors[ACcount]; + } + result.updateMAPifNeeded(log10LofK, altCounts); + + return log10LofK; + } + + /** + * Small helper routine - is a particular AC conformationv vector valid? ie are all elements non-negative and sum to ploidy? + * @param set AC conformation vector + * @param ploidy Ploidy of set + * @return Valid conformation + */ + private static boolean isValidConformation(final int[] set, final int ploidy) { + int sum=0; + for (final int ac: set) { + if (ac < 0) + return false; + sum += ac; + + } + + return (sum == ploidy); + } + + /** + * Combines naively two biallelic pools (of arbitrary size). + * For two pools of size m1 and m2, we can compute the combined likelihood as: + * Pr(D|AC=k) = Sum_{j=0}^k Pr(D|AC1=j) Pr(D|AC2=k-j) * choose(m1,j)*choose(m2,k-j)/choose(m1+m2,k) + * @param originalPool Pool likelihood vector, x[k] = Pr(AC_i = k) for alt allele i + * @param newPLVector Second GL vector + * @param ploidy1 Ploidy of first pool (# of chromosomes in it) + * @param ploidy2 Ploidy of second pool + * @param log10AlleleFrequencyPriors Array of biallelic priors + * @param result Af calculation result object + * @return Combined likelihood vector + */ + public static ProbabilityVector combineBiallelicPoolsNaively(final ProbabilityVector originalPool, final double[] newPLVector, + final int ploidy1, final int ploidy2, final double[] log10AlleleFrequencyPriors, + final AlleleFrequencyCalculationResult result) { + + final int newPloidy = ploidy1 + ploidy2; + + final double[] combinedLikelihoods = new double[1+newPloidy]; + + /** Pre-fill result array and incorporate weights into input vectors + * Say L1(k) = Pr(D|AC1=k) * choose(m1,k) + * and L2(k) = Pr(D|AC2=k) * choose(m2,k) + * equation reduces to + * Pr(D|AC=k) = 1/choose(m1+m2,k) * Sum_{j=0}^k L1(k) L2(k-j) + * which is just plain convolution of L1 and L2 (with pre-existing vector) + */ + + // intialize result vector to -infinity + Arrays.fill(combinedLikelihoods,Double.NEGATIVE_INFINITY); + + final double[] x = Arrays.copyOf(originalPool.getProbabilityVector(),1+ploidy1); + for (int k=originalPool.getProbabilityVector().length; k< x.length; k++) + x[k] = Double.NEGATIVE_INFINITY; + + final double[] y = newPLVector.clone(); + + + final double log10Lof0 = x[0]+y[0]; + result.setLog10LikelihoodOfAFzero(log10Lof0); + result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); + + double maxElement = log10Lof0; + int maxElementIdx = 0; + int[] alleleCounts = new int[1]; + for (int k= originalPool.getMinVal() ; k <= newPloidy; k++) { + double[] acc = new double[k+1]; + Arrays.fill(acc,Double.NEGATIVE_INFINITY); + double innerMax = Double.NEGATIVE_INFINITY; + + for (int j=0; j <=k; j++) { + double x1,y1; + + + if (k-j>=0 && k-j < y.length) + y1 = y[k-j] + MathUtils.log10BinomialCoefficient(ploidy2,k-j); + else + continue; + + if (j < x.length) + x1 = x[j] + MathUtils.log10BinomialCoefficient(ploidy1,j); + else + continue; + + if (Double.isInfinite(x1) || Double.isInfinite(y1)) + continue; + acc[j] = x1 + y1; + if (acc[j] > innerMax) + innerMax = acc[j]; + else if (acc[j] < innerMax - MAX_LOG10_ERROR_TO_STOP_EARLY) + break; + } + combinedLikelihoods[k] = MathUtils.log10sumLog10(acc) - MathUtils.log10BinomialCoefficient(newPloidy,k); + maxElementIdx = k; + double maxDiff = combinedLikelihoods[k] - maxElement; + if (maxDiff > 0) + maxElement = combinedLikelihoods[k]; + else if (maxDiff < maxElement - MAX_LOG10_ERROR_TO_STOP_EARLY) { + break; + } + + alleleCounts[0] = k; + result.updateMLEifNeeded(combinedLikelihoods[k],alleleCounts); + result.updateMAPifNeeded(combinedLikelihoods[k] + log10AlleleFrequencyPriors[k],alleleCounts); + + + } + + + return new ProbabilityVector(MathUtils.normalizeFromLog10(Arrays.copyOf(combinedLikelihoods,maxElementIdx+1),false, true)); + } + + + /** + * From a given variant context, extract a given subset of alleles, and update genotype context accordingly, + * including updating the PL's, and assign genotypes accordingly + * @param vc variant context with alleles and genotype likelihoods + * @param allelesToUse alleles to subset + * @param assignGenotypes true: assign hard genotypes, false: leave as no-call + * @param ploidy number of chromosomes per sample (pool) + * @return GenotypesContext with new PLs + */ + public GenotypesContext subsetAlleles(final VariantContext vc, + final List allelesToUse, + final boolean assignGenotypes, + final int ploidy) { + // the genotypes with PLs + final GenotypesContext oldGTs = vc.getGenotypes(); + List NO_CALL_ALLELES = new ArrayList(ploidy); + + for (int k=0; k < ploidy; k++) + NO_CALL_ALLELES.add(Allele.NO_CALL); + + // samples + final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); + + // the new genotypes to create + final GenotypesContext newGTs = GenotypesContext.create(); + + // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward + final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); + final int numNewAltAlleles = allelesToUse.size() - 1; + + + // create the new genotypes + for ( int k = 0; k < oldGTs.size(); k++ ) { + final Genotype g = oldGTs.get(sampleIndices.get(k)); + if ( !g.hasLikelihoods() ) { + newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + continue; + } + + // create the new likelihoods array from the alleles we are allowed to use + final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); + double[] newLikelihoods; + if ( numOriginalAltAlleles == numNewAltAlleles) { + newLikelihoods = originalLikelihoods; + } else { + newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse); + + // might need to re-normalize + newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); + } + + // if there is no mass on the (new) likelihoods, then just no-call the sample + if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) { + newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + } + else { + final GenotypeBuilder gb = new GenotypeBuilder(g); + + if ( numNewAltAlleles == 0 ) + gb.noPL(); + else + gb.PL(newLikelihoods); + + // if we weren't asked to assign a genotype, then just no-call the sample + if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) + gb.alleles(NO_CALL_ALLELES); + else + assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); + newGTs.add(gb.make()); + } + } + + return newGTs; + + } + + /** + * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs + * + * @param newLikelihoods the PL array + * @param allelesToUse the list of alleles to choose from (corresponding to the PLs) + * @param numChromosomes Number of chromosomes per pool + * + * @return genotype + */ + private static void assignGenotype(final GenotypeBuilder gb, + final double[] newLikelihoods, + final List allelesToUse, + final int numChromosomes) { + final int numNewAltAlleles = allelesToUse.size() - 1; + + + + // find the genotype with maximum likelihoods + final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); + + final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex); + final ArrayList alleleFreqs = new ArrayList(); + final ArrayList alleleCounts = new ArrayList(); + + + for (int k=1; k < mlAlleleCount.length; k++) { + alleleCounts.add(mlAlleleCount[k]); + final double freq = (double)mlAlleleCount[k] / (double)numChromosomes; + alleleFreqs.add(freq); + + } + + // per-pool logging of AC and AF + gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); + gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); + + // remove PLs if necessary + if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING) + gb.noPL(); + + ArrayList myAlleles = new ArrayList(); + + // add list of called ML genotypes to alleles list + // TODO - too unwieldy? + int idx = 0; + for (int mlind = 0; mlind < mlAlleleCount.length; mlind++) { + for (int k=0; k < mlAlleleCount[mlind]; k++) + myAlleles.add(idx++,allelesToUse.get(mlind)); + } + gb.alleles(myAlleles); + + if ( numNewAltAlleles > 0 ) + gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); + } + +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java new file mode 100644 index 000000000..6b0831323 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import net.sf.samtools.SAMUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; + +import java.util.*; + +public abstract class GeneralPloidyGenotypeLikelihoods { + protected final int numChromosomes; + private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 + + protected static final boolean VERBOSE = false; + protected static final double qualVec[] = new double[SAMUtils.MAX_PHRED_SCORE+1]; + + // + // The fundamental data arrays associated with a Genotype Likelhoods object + // + protected double[] log10Likelihoods; + protected double[][] logMismatchProbabilityArray; + + protected final int nSamplesPerPool; + protected final HashMap perLaneErrorModels; + protected final int likelihoodDim; + protected final boolean ignoreLaneInformation; + protected final double LOG10_PLOIDY; + protected boolean hasReferenceSampleData; + + protected final int nAlleles; + protected final List alleles; + + private static final double MIN_LIKELIHOOD = Double.NEGATIVE_INFINITY; + + private static final int MAX_NUM_ALLELES_TO_CACHE = 20; + private static final int MAX_NUM_SAMPLES_PER_POOL = 1000; + + private static final boolean FAST_GL_COMPUTATION = true; + // constructor with given logPL elements + public GeneralPloidyGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy, + final HashMap perLaneErrorModels, final boolean ignoreLaneInformation) { + this.alleles = alleles; + this.nAlleles = alleles.size(); + numChromosomes = ploidy; + nSamplesPerPool = numChromosomes/2; + this.perLaneErrorModels = perLaneErrorModels; + this.ignoreLaneInformation = ignoreLaneInformation; + + // check if at least one lane has actual data + if (perLaneErrorModels == null || perLaneErrorModels.isEmpty()) + hasReferenceSampleData = false; + else { + for (Map.Entry elt : perLaneErrorModels.entrySet()) { + if (elt.getValue().hasData()) { + hasReferenceSampleData = true; + break; + } + } + } + // check sizes + if (nAlleles > MAX_NUM_ALLELES_TO_CACHE) + throw new UserException("No support for this number of alleles"); + + if (nSamplesPerPool > MAX_NUM_SAMPLES_PER_POOL) + throw new UserException("No support for such large number of samples per pool"); + + likelihoodDim = GenotypeLikelihoods.numLikelihoods(nAlleles, numChromosomes); + + if (logLikelihoods == null){ + log10Likelihoods = new double[likelihoodDim]; + Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD); + } else { + if (logLikelihoods.length != likelihoodDim) + throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object"); + + log10Likelihoods = logLikelihoods; //.clone(); // is clone needed? + } + fillCache(); + LOG10_PLOIDY = Math.log10((double)numChromosomes); + } + + + /** + * Crucial inner class that handles addressing elements of pool likelihoods. We store likelihoods as a map + * of form int[] -> double (to be more precise, IntArrayWrapper -> Double). + * For a given ploidy (chromosome count) and number of alleles, we need a form to iterate deterministically + * across all possible allele conformations. + * Problem equivalent to listing in determistic order all possible ways in which N integers will sum to P, + * where N is number of alleles and P is number of chromosomes. + * There's an option to list all integers so that sum will be UP to P. + * For example, with P=2,N=2, restrictSumTo = 2 iterator will produce + * [2 0 ] [1 1] [ 0 2] + * + * + */ + protected static class SumIterator { + private int[] currentState; + private final int[] finalState; + private final int restrictSumTo; + private final int dim; + private boolean hasNext; + private int linearIndex; + private int currentSum; + + /** + * Default constructor. Typical use case: restrictSumTo = -1 if there's no sum restriction, or will generate int[] + * vectors so that all add to this value. + * + * @param finalState End state - typically we should set value to (P,P,P,...) + * @param restrictSumTo See above + */ + public SumIterator(final int[] finalState,final int restrictSumTo) { + this.finalState = finalState; + this.dim = finalState.length; + this.restrictSumTo = restrictSumTo; + currentState = new int[dim]; + reset(); + + } + + /** + * Shortcut constructor for common use case: iterator will produce + * all vectors of length numAlleles whose sum = numChromosomes + * @param numAlleles Number of alleles + * @param numChromosomes Ploidy + */ + public SumIterator(final int numAlleles, final int numChromosomes) { + this(getInitialStateVector(numAlleles,numChromosomes), numChromosomes); + } + + + private static int[] getInitialStateVector(final int nAlleles, final int numChromosomes) { + int[] initialState = new int[nAlleles]; + Arrays.fill(initialState,numChromosomes); + return initialState; + } + + public void setInitialStateVector(final int[] stateVector) { + if (restrictSumTo > 0) { + // check that desired vector is valid + if (MathUtils.sum(stateVector) != restrictSumTo) + throw new ReviewedStingException("BUG: initial state vector nor compatible with sum iterator"); + + final int numAlleles = currentState.length; + final int ploidy = restrictSumTo; + + linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy); + } + else + throw new ReviewedStingException("BUG: Not supported"); + + } + public void next() { + int initialDim = (restrictSumTo > 0)?1:0; + hasNext = next(finalState, initialDim); + if (hasNext) + linearIndex++; + } + + private boolean next(final int[] finalState, int initialDim) { + boolean hasNextState = false; + for (int currentDim=initialDim; currentDim < finalState.length; currentDim++) { + final int x = currentState[currentDim]+1; + + if (x > finalState[currentDim] || (currentSum >= restrictSumTo && initialDim > 0)) { + // update vector sum, and reset position + currentSum -= currentState[currentDim]; + currentState[currentDim] = 0; + if (currentDim >= dim-1) { + hasNextState = false; + break; + } + } + else { + currentState[currentDim] = x; + hasNextState = true; + currentSum++; + break; + } + } + if (initialDim > 0) { + currentState[0] = restrictSumTo - currentSum; + } + return hasNextState; + } + + public void reset() { + Arrays.fill(currentState, 0); + if (restrictSumTo > 0) + currentState[0] = restrictSumTo; + hasNext = true; + linearIndex = 0; + currentSum = 0; + } + public int[] getCurrentVector() { + return currentState; + } + + public int[] getCurrentAltVector() { + return Arrays.copyOfRange(currentState,1,currentState.length); + } + /* public int getCurrentSum() { + return currentSum; + } + */ + public int getLinearIndex() { + return linearIndex; + } + + public boolean hasNext() { + return hasNext; + } + } + + public List getAlleles() { return alleles;} + + /** + * Returns an array of log10 likelihoods for each genotype conformation, with ordering determined by SumIterator class. + * + * @return likelihoods array + */ + public double[] getLikelihoods() { + return log10Likelihoods; + } + + + + + + /** + * Set particular element of logPL vector + * @param idx index of allele count conformation to modify + * @param pl Likelihood to associate with map + */ + public void setLogPLs(final int idx, final double pl) { + log10Likelihoods[idx] = pl; + } + + public void renormalize() { + log10Likelihoods = MathUtils.normalizeFromLog10(log10Likelihoods,false,true); + } + /** Compute most likely AC conformation based on currently stored PL's - just loop through log PL map and output max value + * + * @return vector with most likely allele count, ordered according to this object's alleles + */ + public Pair getMostLikelyACCount() { + + int[] mlInd = null; + double maxVal = Double.NEGATIVE_INFINITY; + + final SumIterator iterator = new SumIterator(alleles.size(),numChromosomes); + + int idx = 0; + while (iterator.hasNext()) { + double pl = log10Likelihoods[idx++]; + if (pl > maxVal) { + maxVal = pl; + mlInd = iterator.getCurrentVector().clone(); + + } + iterator.next(); + } + if (VERBOSE) { + System.out.println(VCFConstants.MLE_ALLELE_COUNT_KEY + ": " + Arrays.toString(mlInd)); + } + return new Pair(mlInd,maxVal); + } + + /** + * Given set of alleles with corresponding vector of likelihoods, subset to a new set of alleles + * + * @param oldLikelihoods Vector of PL's corresponding to original alleles + * @param numChromosomes Ploidy (number of chromosomes describing PL's) + * @param originalAlleles List of original alleles + * @param allelesToSubset Alleles to subset + * @return Vector of new PL's, ordered accorrding to SumIterator's ordering + */ + public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes, + final List originalAlleles, final List allelesToSubset) { + + int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes); + double[] newPLs = new double[newPLSize]; + + + int idx = 0; + // First fill boolean array stating whether each original allele is present in new mapping + final boolean [] allelePresent = new boolean[originalAlleles.size()]; + for ( Allele allele : originalAlleles ) + allelePresent[idx++] = allelesToSubset.contains(allele); + + + // compute mapping from old idx to new idx + // This might be needed in case new allele set is not ordered in the same way as old set + // Example. Original alleles: {T*,C,G,A}. New alleles: {G,C}. Permutation key = [2,1] + + int[] permutationKey = new int[allelesToSubset.size()]; + for (int k=0; k < allelesToSubset.size(); k++) + // for each allele to subset, find corresponding index in original allele list + permutationKey[k] = originalAlleles.indexOf(allelesToSubset.get(k)); + + + if (VERBOSE) { + System.out.println("permutationKey:"+Arrays.toString(permutationKey)); + } + + final SumIterator iterator = new SumIterator(originalAlleles.size(),numChromosomes); + + while (iterator.hasNext()) { + // for each entry in logPL table, associated originally with allele count stored in vec[], + // see if this allele count conformation will be present in new logPL table. + // For entry to be present, elements in dimensions not present in requested allele list have to have count = 0 + int[] pVec = iterator.getCurrentVector(); + double pl = oldLikelihoods[iterator.getLinearIndex()]; + + boolean keyPresent = true; + for (int k=0; k < allelePresent.length; k++) + if ( pVec[k]>0 && !allelePresent[k] ) + keyPresent = false; + + if (keyPresent) {// skip to next entry in logPLs if this conformation is not present in subset + + final int[] newCount = new int[allelesToSubset.size()]; + + // map from old allele mapping count to new allele mapping + // In pseudo-Matlab notation: newCount = vec[permutationKey] for permutationKey vector + for (idx = 0; idx < newCount.length; idx++) + newCount[idx] = pVec[permutationKey[idx]]; + + // get corresponding index from new count + int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes); + newPLs[outputIdx] = pl; + if (VERBOSE) { + System.out.println("Old Key:"+Arrays.toString(pVec)); + System.out.println("New Key:"+Arrays.toString(newCount)); + } + } + iterator.next(); + } + + return newPLs; + } + + public static int getLinearIndex(int[] vectorIdx, int numAlleles, int ploidy) { + + if (ploidy <= 0) + return 0; + + int linearIdx = 0; + int cumSum = ploidy; + for (int k=numAlleles-1;k>=1; k--) { + int idx = vectorIdx[k]; + // how many blocks are before current position + if (idx == 0) + continue; + for (int p=0; p < idx; p++) + linearIdx += getNumLikelihoodElements( k, cumSum-p); + + cumSum -= idx; + } + + return linearIdx; + + } + + /** + * Given a scalar index, what's the alelle count conformation corresponding to it? + * @param nAlleles Number of alleles + * @param numChromosomes Ploidy + * @param PLindex Index to query + * @return Allele count conformation, according to iteration order from SumIterator + */ + public static int[] getAlleleCountFromPLIndex(final int nAlleles, final int numChromosomes, final int PLindex) { + + // todo - another brain-dead inefficient implementation, can do much better by computing in closed form + final SumIterator iterator = new SumIterator(nAlleles,numChromosomes); + while (iterator.hasNext()) { + final int[] plVec = iterator.getCurrentVector(); + if (iterator.getLinearIndex() == PLindex) + return plVec; + + iterator.next(); + } + + return null; + + } + + /* + * a cache of the PL ivector sizes as a function of # of alleles and pool sizes + */ + + public static int getNumLikelihoodElements(int numAlleles, int ploidy) { + return GenotypeLikelihoodVectorSizes[numAlleles][ploidy]; + } + + private final static int[][] GenotypeLikelihoodVectorSizes = fillGLVectorSizeCache(MAX_NUM_ALLELES_TO_CACHE, 2*MAX_NUM_SAMPLES_PER_POOL); + + private static int[][] fillGLVectorSizeCache(int maxAlleles, int maxPloidy) { + + int[][] cache = new int[maxAlleles][maxPloidy]; + for (int numAlleles=1; numAlleles < maxAlleles; numAlleles++) { + for (int ploidy=0; ploidy < maxPloidy; ploidy++) { + + if (numAlleles == 1) + cache[numAlleles][ploidy] = 1; + else if (ploidy == 1) + cache[numAlleles][ploidy] = numAlleles; + else { + int acc =0; + for (int k=0; k <= ploidy; k++ ) + acc += cache[numAlleles-1][ploidy-k]; + + cache[numAlleles][ploidy] = acc; + } + } + } + return cache; + } + + /** + * Return a string representation of this object in a moderately usable form + * + * @return string representation + */ + public String toString() { + StringBuilder s = new StringBuilder(1000); + + s.append("Alleles:"); + for (Allele a: this.alleles){ + s.append(a.getDisplayString()); + s.append(","); + } + s.append("\nGLs:\n"); + SumIterator iterator = new SumIterator(nAlleles,numChromosomes); + while (iterator.hasNext()) { + if (!Double.isInfinite(getLikelihoods()[iterator.getLinearIndex()])) { + + s.append("Count ["); + StringBuilder b = new StringBuilder(iterator.getCurrentVector().length*2); + for (int it:iterator.getCurrentVector()) { + b.append(it); + b.append(","); + } + s.append(b.toString()); + s.append(String.format("] GL=%4.3f\n",this.getLikelihoods()[iterator.getLinearIndex()]) ); + } + iterator.next(); + } + return s.toString(); + } + + + public void computeLikelihoods(ErrorModel errorModel, + List alleleList, List numObservations, ReadBackedPileup pileup) { + + if (FAST_GL_COMPUTATION) { + // queue up elements to be computed. Assumptions: + // GLs distributions are unimodal + // GLs are continuous + // Hence, once an AC conformation is computed, we queue up its immediate topological neighbors. + // If neighbors fall below maximum - threshold, we don't queue up THEIR own neighbors + // and we repeat until queue is empty + // queue of AC conformations to process + final LinkedList ACqueue = new LinkedList(); + // mapping of ExactACset indexes to the objects + final HashMap indexesToACset = new HashMap(likelihoodDim); + // add AC=0 to the queue + final int[] zeroCounts = new int[nAlleles]; + zeroCounts[0] = numChromosomes; + + AlleleFrequencyCalculationModel.ExactACset zeroSet = + new AlleleFrequencyCalculationModel.ExactACset(1, new AlleleFrequencyCalculationModel.ExactACcounts(zeroCounts)); + + ACqueue.add(zeroSet); + indexesToACset.put(zeroSet.ACcounts, zeroSet); + + // keep processing while we have AC conformations that need to be calculated + double maxLog10L = Double.NEGATIVE_INFINITY; + while ( !ACqueue.isEmpty() ) { + // compute log10Likelihoods + final AlleleFrequencyCalculationModel.ExactACset ACset = ACqueue.remove(); + final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, errorModel, alleleList, numObservations, maxLog10L, ACqueue, indexesToACset, pileup); + + // adjust max likelihood seen if needed + maxLog10L = Math.max(maxLog10L, log10LofKs); + // clean up memory + indexesToACset.remove(ACset.ACcounts); + if ( VERBOSE ) + System.out.printf(" *** removing used set=%s%n", ACset.ACcounts); + + } + + + } else { + int plIdx = 0; + SumIterator iterator = new SumIterator(nAlleles, numChromosomes); + while (iterator.hasNext()) { + AlleleFrequencyCalculationModel.ExactACset ACset = + new AlleleFrequencyCalculationModel.ExactACset(1, new AlleleFrequencyCalculationModel.ExactACcounts(iterator.getCurrentVector())); + // for observed base X, add Q(jX,k) to likelihood vector for all k in error model + //likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k)) + getLikelihoodOfConformation(ACset, errorModel, alleleList, numObservations, pileup); + + setLogPLs(plIdx++, ACset.log10Likelihoods[0]); + iterator.next(); + } + } + // normalize PL's + renormalize(); + + } + + private double calculateACConformationAndUpdateQueue(final ExactAFCalculationModel.ExactACset set, + final ErrorModel errorModel, + final List alleleList, + final List numObservations, + final double maxLog10L, + final LinkedList ACqueue, + final HashMap indexesToACset, + final ReadBackedPileup pileup) { + // compute likelihood of set + getLikelihoodOfConformation(set, errorModel, alleleList, numObservations, pileup); + final double log10LofK = set.log10Likelihoods[0]; + + // log result in PL vector + int idx = getLinearIndex(set.ACcounts.getCounts(), nAlleles, numChromosomes); + setLogPLs(idx, log10LofK); + + // can we abort early because the log10Likelihoods are so small? + if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( VERBOSE ) + System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); + return log10LofK; + } + + // iterate over higher frequencies if possible + // by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count. + final int ACwiggle = numChromosomes - set.getACsum() + set.ACcounts.counts[0]; + if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies + return log10LofK; + + + // add conformations for other cases + for ( int allele = 1; allele < nAlleles; allele++ ) { + final int[] ACcountsClone = set.ACcounts.getCounts().clone(); + ACcountsClone[allele]++; + // is this a valid conformation? + int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0]; + ACcountsClone[0] = numChromosomes - altSum; + if (ACcountsClone[0] < 0) + continue; + + + updateACset(ACcountsClone, ACqueue, indexesToACset); + } + return log10LofK; + + } + + /** + * Abstract methods, must be implemented in subclasses + * + * @param ACset Count to compute + * @param errorModel Site-specific error model object + * @param alleleList List of alleles + * @param numObservations Number of observations for each allele + * @param pileup Read backed pileup in case it's necessary + */ + public abstract void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset, + final ErrorModel errorModel, + final List alleleList, + final List numObservations, + final ReadBackedPileup pileup); + + + public abstract int add(ReadBackedPileup pileup, UnifiedArgumentCollection UAC); + + // Static methods + public static void updateACset(final int[] newSetCounts, + final LinkedList ACqueue, + final HashMap indexesToACset) { + + final AlleleFrequencyCalculationModel.ExactACcounts index = new AlleleFrequencyCalculationModel.ExactACcounts(newSetCounts); + if ( !indexesToACset.containsKey(index) ) { + AlleleFrequencyCalculationModel.ExactACset newSet = new AlleleFrequencyCalculationModel.ExactACset(1, index); + indexesToACset.put(index, newSet); + ACqueue.add(newSet); + if (VERBOSE) + System.out.println(" *** Adding set to queue:" + index.toString()); + } + + } + // ----------------------------------------------------------------------------------------------------------------- + // + // + // helper routines + // + // + // ----------------------------------------------------------------------------------------------------------------- + + + // + // Constant static data + // + + static { + // cache 10^(-k/10) + for (int j=0; j <= SAMUtils.MAX_PHRED_SCORE; j++) + qualVec[j] = Math.pow(10.0,-(double)j/10.0); + } + + private void fillCache() { + // cache Q(j,k) = log10(j/2N*(1-ek) + (2N-j)/2N*ek) for j = 0:2N + + logMismatchProbabilityArray = new double[1+numChromosomes][1+SAMUtils.MAX_PHRED_SCORE]; + for (int i=0; i <= numChromosomes; i++) { + for (int j=0; j <= SAMUtils.MAX_PHRED_SCORE; j++) { + double phi = (double)i/numChromosomes; + logMismatchProbabilityArray[i][j] = Math.log10(phi * (1.0-qualVec[j]) + qualVec[j]/3.0 * (1.0-phi)); + } + } + } + +} + diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java new file mode 100644 index 000000000..f6ce818be --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.util.*; + +public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { + + //protected Set laneIDs; + public enum Model { + SNP, + INDEL, + POOLSNP, + POOLINDEL, + BOTH + } + + final protected UnifiedArgumentCollection UAC; + + protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { + super(UAC,logger); + this.UAC = UAC; + + } + + + /* + Get vc with alleles from reference sample. Can be null if there's no ref sample call or no ref sample coverage at this site. + */ + protected VariantContext getTrueAlleles(final RefMetaDataTracker tracker, + final ReferenceContext ref, + Map contexts) { + // Get reference base from VCF or Reference + if (UAC.referenceSampleName == null) + return null; + + AlignmentContext context = contexts.get(UAC.referenceSampleName); + ArrayList trueReferenceAlleles = new ArrayList(); + + VariantContext referenceSampleVC; + + if (tracker != null && context != null) + referenceSampleVC = tracker.getFirstValue(UAC.referenceSampleRod, context.getLocation()); + else + return null; + + if (referenceSampleVC == null) { + trueReferenceAlleles.add(Allele.create(ref.getBase(),true)); + return new VariantContextBuilder("pc",ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop(),trueReferenceAlleles).make(); + + } + else { + Genotype referenceGenotype = referenceSampleVC.getGenotype(UAC.referenceSampleName); + List referenceAlleles = referenceGenotype.getAlleles(); + + return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(), + referenceSampleVC.getAlleles()) + .genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make()) + .make(); + } + } + + + /** + * GATK Engine creates readgroups of the form XXX.Y.Z + * XXX.Y is the unique lane identifier. + * Z is the id of the sample to make the read group id unique + * This function returns the list of lane identifiers. + * + * @param readGroups readGroups A collection of read group strings (obtained from the alignment context pileup) + * @return a collection of lane ids. + */ + public static Set parseLaneIDs(Collection readGroups) { + HashSet result = new HashSet(); + for (String readGroup : readGroups) { + result.add(getLaneIDFromReadGroupString(readGroup)); + } + return result; + } + + /** + * GATK Engine creates readgroups of the form XXX.Y.Z + * XXX.Y is the unique lane identifier. + * Z is the id of the sample to make the read group id unique + * + * @param readGroupID the read group id string + * @return just the lane id (the XXX.Y string) + */ + public static String getLaneIDFromReadGroupString(String readGroupID) { + // System.out.println(readGroupID); + String [] parsedID = readGroupID.split("\\."); + if (parsedID.length > 1) + return parsedID[0] + "." + parsedID[1]; + else + return parsedID[0] + ".0"; + } + + + /** Wrapper class that encapsulates likelihood object and sample name + * + */ + protected static class PoolGenotypeData { + + public final String name; + public final GeneralPloidyGenotypeLikelihoods GL; + public final int depth; + public final List alleles; + + public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List alleles) { + this.name = name; + this.GL = GL; + this.depth = depth; + this.alleles = alleles; + } + } + + // determines the alleles to use + protected List determineAlternateAlleles(final List sampleDataList) { + + if (sampleDataList.isEmpty()) + return Collections.emptyList(); + + final int REFERENCE_IDX = 0; + final List allAlleles = sampleDataList.get(0).GL.getAlleles(); + double[] likelihoodSums = new double[allAlleles.size()]; + + // based on the GLs, find the alternate alleles with enough probability + for ( PoolGenotypeData sampleData : sampleDataList ) { + final Pair mlACPair = sampleData.GL.getMostLikelyACCount(); + final double topLogGL = mlACPair.second; + + if (sampleData.GL.getAlleles().size() != allAlleles.size()) + throw new ReviewedStingException("BUG: inconsistent size of alleles!"); + + // ref allele is always first in array list + if (sampleData.GL.alleles.get(0).isNonReference()) + throw new ReviewedStingException("BUG: first allele in list is not reference!"); + + double refGL = sampleData.GL.getLikelihoods()[REFERENCE_IDX]; + + // check if maximum likelihood AC is all-ref for current pool. If so, skip + if (mlACPair.first[REFERENCE_IDX] == sampleData.GL.numChromosomes) + continue; + + // most likely AC is not all-ref: for all non-ref alleles, add difference of max likelihood and all-ref likelihood + for (int i=0; i < mlACPair.first.length; i++) { + if (i==REFERENCE_IDX) continue; + + if (mlACPair.first[i] > 0) + likelihoodSums[i] += topLogGL - refGL; + + } + } + + final List allelesToUse = new ArrayList(); + for ( int i = 0; i < likelihoodSums.length; i++ ) { + if ( likelihoodSums[i] > 0.0 ) + allelesToUse.add(allAlleles.get(i)); + } + + return allelesToUse; + } + + + public VariantContext getLikelihoods(final RefMetaDataTracker tracker, + final ReferenceContext ref, + Map contexts, + final AlignmentContextUtils.ReadOrientation contextType, + final List allAllelesToUse, + final boolean useBAQedPileup, + final GenomeLocParser locParser) { + + HashMap perLaneErrorModels = getPerLaneErrorModels(tracker, ref, contexts); + if (perLaneErrorModels == null && UAC.referenceSampleName != null) + return null; + + if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) { + AlignmentContext mergedContext = AlignmentContextUtils.joinContexts(contexts.values()); + Map newContext = new HashMap(); + newContext.put(DUMMY_SAMPLE_NAME,mergedContext); + contexts = newContext; + } + + // get initial alleles to genotype + final List allAlleles = new ArrayList(); + if (allAllelesToUse == null || allAllelesToUse.isEmpty()) + allAlleles.addAll(getInitialAllelesToUse(tracker, ref,contexts,contextType,locParser, allAllelesToUse)); + else + allAlleles.addAll(allAllelesToUse); + + if (allAlleles.isEmpty()) + return null; + + final ArrayList GLs = new ArrayList(contexts.size()); + + for ( Map.Entry sample : contexts.entrySet() ) { + // skip reference sample + if (UAC.referenceSampleName != null && sample.getKey().equals(UAC.referenceSampleName)) + continue; + + ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); + + // create the GenotypeLikelihoods object + final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO); + // actually compute likelihoods + final int nGoodBases = GL.add(pileup, UAC); + if ( nGoodBases > 0 ) + // create wrapper object for likelihoods and add to list + GLs.add(new PoolGenotypeData(sample.getKey(), GL, getFilteredDepth(pileup), allAlleles)); + } + + // find the alternate allele(s) that we should be using + final List alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs); + if (alleles == null || alleles.isEmpty()) + return null; + // start making the VariantContext + final GenomeLoc loc = ref.getLocus(); + final int endLoc = getEndLocation(tracker, ref, alleles); + + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles); + builder.alleles(alleles); + + final HashMap attributes = new HashMap(); + + if (UAC.referenceSampleName != null && perLaneErrorModels != null) + attributes.put(VCFConstants.REFSAMPLE_DEPTH_KEY, ErrorModel.getTotalReferenceDepth(perLaneErrorModels)); + + builder.attributes(attributes); + // create the genotypes; no-call everyone for now + final GenotypesContext genotypes = GenotypesContext.create(); + final List noCall = new ArrayList(); + noCall.add(Allele.NO_CALL); + + for ( PoolGenotypeData sampleData : GLs ) { + // extract from multidimensional array + final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes, + allAlleles, alleles); + + // normalize in log space so that max element is zero. + final GenotypeBuilder gb = new GenotypeBuilder(sampleData.name, noCall); + gb.DP(sampleData.depth); + gb.PL(MathUtils.normalizeFromLog10(myLikelihoods, false, true)); + genotypes.add(gb.make()); + } + + return builder.genotypes(genotypes).make(); + + } + + + protected HashMap getPerLaneErrorModels(final RefMetaDataTracker tracker, + final ReferenceContext ref, + Map contexts) { + VariantContext refVC = getTrueAlleles(tracker, ref, contexts); + + + // Build error model for site based on reference sample, and keep stratified for each lane. + AlignmentContext refContext = null; + if (UAC.referenceSampleName != null) + refContext = contexts.get(UAC.referenceSampleName); + + ReadBackedPileup refPileup = null; + if (refContext != null) { + HashMap perLaneErrorModels = new HashMap(); + refPileup = refContext.getBasePileup(); + + Set laneIDs = new TreeSet(); + if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL || UAC.IGNORE_LANE_INFO) + laneIDs.add(DUMMY_LANE); + else + laneIDs = parseLaneIDs(refPileup.getReadGroups()); + // build per-lane error model for all lanes present in ref sample + for (String laneID : laneIDs) { + // get reference pileup for this lane + ReadBackedPileup refLanePileup = refPileup; + // subset for this lane + if (refPileup != null && !(UAC.TREAT_ALL_READS_AS_SINGLE_POOL || UAC.IGNORE_LANE_INFO)) + refLanePileup = refPileup.getPileupForLane(laneID); + + //ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles); + perLaneErrorModels.put(laneID, new ErrorModel(UAC, refLanePileup, refVC, ref)); + } + return perLaneErrorModels; + + } + else + return null; + + } + + /* + Abstract methods - must be implemented in derived classes + */ + + protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, + final double[] logLikelihoods, + final int ploidy, + final HashMap perLaneErrorModels, + final boolean useBQAedPileup, + final ReferenceContext ref, + final boolean ignoreLaneInformation); + + protected abstract List getInitialAllelesToUse(final RefMetaDataTracker tracker, + final ReferenceContext ref, + Map contexts, + final AlignmentContextUtils.ReadOrientation contextType, + final GenomeLocParser locParser, + final List allAllelesToUse); + + protected abstract List getFinalAllelesToUse(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final List allAllelesToUse, + final ArrayList GLs); + + protected abstract int getEndLocation(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final List alternateAllelesToUse); +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java new file mode 100644 index 000000000..4f42f820e --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java @@ -0,0 +1,221 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: delangel + * Date: 5/18/12 + * Time: 10:06 AM + * To change this template use File | Settings | File Templates. + */ +public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods { + final PairHMMIndelErrorModel pairModel; + final LinkedHashMap haplotypeMap; + final ReferenceContext refContext; + final int eventLength; + double[][] readHaplotypeLikelihoods; + + final byte refBase; + + public GeneralPloidyIndelGenotypeLikelihoods(final List alleles, + final double[] logLikelihoods, + final int ploidy, + final HashMap perLaneErrorModels, + final boolean ignoreLaneInformation, + final PairHMMIndelErrorModel pairModel, + final LinkedHashMap haplotypeMap, + final ReferenceContext referenceContext) { + super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation); + this.pairModel = pairModel; + this.haplotypeMap = haplotypeMap; + this.refContext = referenceContext; + this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles); + // todo - not needed if indel alleles have base at current position + this.refBase = referenceContext.getBase(); + } + + // ------------------------------------------------------------------------------------- + // + // add() routines. These are the workhorse routines for calculating the overall genotype + // likelihoods given observed bases and reads. Includes high-level operators all the + // way down to single base and qual functions. + // + // ------------------------------------------------------------------------------------- + + /** + * Updates likelihoods and posteriors to reflect the additional observations contained within the + * read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the + * pileup + * + * @param pileup read pileup + * @param UAC the minimum base quality at which to consider a base valid + * @return the number of good bases found in the pileup + */ + public int add(ReadBackedPileup pileup, UnifiedArgumentCollection UAC) { + int n = 0; + + if (!hasReferenceSampleData) { + // no error models + return add(pileup, (ErrorModel)null); + } + for (String laneID : perLaneErrorModels.keySet() ) { + // get pileup for this lane + ReadBackedPileup perLanePileup; + if (ignoreLaneInformation) + perLanePileup = pileup; + else + perLanePileup = pileup.getPileupForLane(laneID); + + if (perLanePileup == null || perLanePileup.isEmpty()) + continue; + + ErrorModel errorModel = perLaneErrorModels.get(laneID); + n += add(perLanePileup, errorModel); + if (ignoreLaneInformation) + break; + + } + + return n; + } + + /** + * Calculates the pool's probability for all possible allele counts for all indel alleles observed. + * Calculation is based on the error model + * generated by the reference sample on the same lane. The probability is given by : + * + * Pr(ac = j1,j2,.. | pool, errorModel) = sum_over_all_Qs ( Pr(j1,j2,.. * Pr(errorModel_q) * + * Pr(ac=j1,j2,..| pool, errorModel) = sum_over_all_Qs ( Pr(ac=j1,j2,..) * Pr(errorModel_q) * + * [j1 * (1-eq)/2n + eq/3*(2*N-j1) + * [jA*(1-eq)/2n + eq/3*(jc+jg+jt)/2N)^nA * jC*(1-eq)/2n + eq/3*(ja+jg+jt)/2N)^nC * + * jG*(1-eq)/2n + eq/3*(jc+ja+jt)/2N)^nG * jT*(1-eq)/2n + eq/3*(jc+jg+ja)/2N)^nT + * + * log Pr(ac=jA,jC,jG,jT| pool, errorModel) = logsum( Pr(ac=jA,jC,jG,jT) * Pr(errorModel_q) * + * [jA*(1-eq)/2n + eq/3*(jc+jg+jt)/2N)^nA * jC*(1-eq)/2n + eq/3*(ja+jg+jt)/2N)^nC * + * jG*(1-eq)/2n + eq/3*(jc+ja+jt)/2N)^nG * jT*(1-eq)/2n + eq/3*(jc+jg+ja)/2N)^nT) + * = logsum(logPr(ac=jA,jC,jG,jT) + log(Pr(error_Model(q) + * )) + nA*log(jA/2N(1-eq)+eq/3*(2N-jA)/2N) + nC*log(jC/2N(1-eq)+eq/3*(2N-jC)/2N) + * + log(jG/2N(1-eq)+eq/3*(2N-jG)/2N) + log(jT/2N(1-eq)+eq/3*(2N-jT)/2N) + * + * Let Q(j,k) = log(j/2N*(1-e[k]) + (2N-j)/2N*e[k]/3) + * + * Then logPr(ac=jA,jC,jG,jT|D,errorModel) = logPR(ac=Ja,jC,jG,jT) + logsum_k( logPr (errorModel[k], + * nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k)) + * + * If pileup data comes from several error models (because lanes can have different error models), + * Pr(Ac=j|D,E1,E2) = sum(Pr(AC1=j1|D,E1,E2) * Pr(AC2=j-j2|D,E1,E2)) + * = sum(Pr(AC1=j1|D,E1)*Pr(AC2=j-j1|D,E2)) from j=0..2N + * + * So, for each lane, build error model and combine lanes. + * To store model, can do + * for jA=0:2N + * for jC = 0:2N-jA + * for jG = 0:2N-jA-jC + * for jT = 0:2N-jA-jC-jG + * Q(jA,jC,jG,jT) + * for k = minSiteQual:maxSiteQual + * likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k)) + * + * + * + * where: nA,nC,nG,nT = counts of bases observed in pileup. + * + * + * @param pileup Base pileup + * @param errorModel Site error model + * @return Number of bases added + */ + private int add(ReadBackedPileup pileup, ErrorModel errorModel) { + int n=0; + + // Number of alleless in pileup, in that order + List numSeenBases = new ArrayList(this.alleles.size()); + + if (!hasReferenceSampleData) { + final int numHaplotypes = haplotypeMap.size(); + + final int readCounts[] = new int[pileup.getNumberOfElements()]; + readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts); + n = readHaplotypeLikelihoods.length; + } else { + Allele refAllele = null; + for (Allele a:alleles) { + numSeenBases.add(0); + if (a.isReference()) + refAllele = a; + } + + if (refAllele == null) + throw new ReviewedStingException("BUG: no ref alleles in passed in allele list!"); + + // count number of elements in pileup + for (PileupElement elt : pileup) { + if (VERBOSE) + System.out.format("base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d\n",elt.getBase(), elt.isBeforeDeletionStart(),elt.isBeforeInsertion(),elt.getEventBases(),elt.getEventLength()); + int idx =0; + for (Allele allele : alleles) { + int cnt = numSeenBases.get(idx); + numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0)); + } + + n++; + + } + } + computeLikelihoods(errorModel, alleles, numSeenBases, pileup); + return n; + } + + + + /** + * Compute likelihood of current conformation + * + * @param ACset Count to compute + * @param errorModel Site-specific error model object + * @param alleleList List of alleles + * @param numObservations Number of observations for each allele in alleleList + */ + public void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset, + final ErrorModel errorModel, + final List alleleList, + final List numObservations, + final ReadBackedPileup pileup) { + final int[] currentCnt = Arrays.copyOf(ACset.ACcounts.counts, alleleList.size()); + double p1 = 0.0; + + if (!hasReferenceSampleData) { + // no error model: use pair HMM likelihoods + for (int i=0; i < readHaplotypeLikelihoods.length; i++) { + double acc[] = new double[alleleList.size()]; + for (int k=0; k < acc.length; k++ ) + acc[k] = readHaplotypeLikelihoods[i][k] + MathUtils.log10Cache[currentCnt[k]]-LOG10_PLOIDY; + p1 += MathUtils.log10sumLog10(acc); + } + + } else { + final int minQ = errorModel.getMinSignificantQualityScore(); + final int maxQ = errorModel.getMaxSignificantQualityScore(); + final double[] acVec = new double[maxQ - minQ + 1]; + + + for (int k=minQ; k<=maxQ; k++) { + int idx=0; + for (int n : numObservations) + acVec[k-minQ] += n*logMismatchProbabilityArray[currentCnt[idx++]][k]; + } + p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ, maxQ), acVec); + } + ACset.log10Likelihoods[0] = p1; + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java new file mode 100644 index 000000000..f6559f666 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.util.*; + +public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel { + private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4; + + private PairHMMIndelErrorModel pairModel; + /* + private static ThreadLocal>> indelLikelihoodMap = + new ThreadLocal>>() { + protected synchronized HashMap> initialValue() { + return new HashMap>(); + } + }; + */ + + private LinkedHashMap haplotypeMap; + + /* + static { + indelLikelihoodMap.set(new HashMap>()); + } + */ + + protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) { + super(UAC, logger); + + + pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, + UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION); + haplotypeMap = new LinkedHashMap(); + } + + + protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, + final double[] logLikelihoods, + final int ploidy, + final HashMap perLaneErrorModels, + final boolean useBQAedPileup, + final ReferenceContext ref, + final boolean ignoreLaneInformation){ + return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref); + } + + protected List getInitialAllelesToUse(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final Map contexts, + final AlignmentContextUtils.ReadOrientation contextType, + final GenomeLocParser locParser, + final List allAllelesToUse){ + + + List alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true); + + if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE) + alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE); + if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { + IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear(); + haplotypeMap.clear(); + } + IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap); + + // sanity check: if haplotype map couldn't be created, clear allele list + if (haplotypeMap.isEmpty()) + alleles.clear(); + return alleles; + + } + + protected List getFinalAllelesToUse(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final List allAllelesToUse, + final ArrayList GLs) { + + // find the alternate allele(s) that we should be using + final List alleles = new ArrayList(); + if ( allAllelesToUse != null ) + alleles.addAll(allAllelesToUse); + else if (!GLs.isEmpty()) + alleles.addAll(GLs.get(0).alleles); + return alleles; + + } + + protected int getEndLocation(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final List allelesToUse) { + return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1; + } +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java new file mode 100644 index 000000000..944372907 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java @@ -0,0 +1,356 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + + +import net.sf.samtools.SAMUtils; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.variantcontext.Allele; + +import java.util.*; + +import static java.lang.Math.log10; +import static java.lang.Math.pow; + + +/** + * Stable, error checking version of the pool genotyper. Useful for calculating the likelihoods, priors, + * and posteriors given a pile of bases and quality scores + * +*/ +public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ { + + final List myAlleles; + final int[] alleleIndices; + final boolean useBAQedPileup; + final byte refByte; + int mbq; + //final double[] PofDGivenBase; + + protected static final double[][][] qualLikelihoodCache; + /** + * Create a new GenotypeLikelhoods object with given priors and PCR error rate for each pool genotype + * @param alleles Alleles associated with this likelihood object + * @param logLikelihoods Likelihoods (can be null if no likelihoods known) + * @param ploidy Ploidy of sample (# of chromosomes) + * @param perLaneErrorModels error model objects for each lane + * @param useBQAedPileup Use BAQed pileup + * @param ignoreLaneInformation If true, lane info is ignored + */ + public GeneralPloidySNPGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy, + final HashMap perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) { + super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation); + this.useBAQedPileup = useBQAedPileup; + + myAlleles = new ArrayList(alleles); + + Allele refAllele = alleles.get(0); + //sanity check: by construction, first allele should ALWAYS be the reference alleles + if (!refAllele.isReference()) + throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!"); + + refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref! + + if (myAlleles.size() < BaseUtils.BASES.length) { + // likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones, + for (byte b : BaseUtils.BASES) { + // if base is not included in myAlleles, add new allele + boolean isRef = (b==refByte); + if (!myAlleles.contains(Allele.create(b,isRef))) + myAlleles.add(Allele.create(b,isRef)); + + } + + } + + + // compute permutation vector to figure out mapping from indices to bases + int idx = 0; + alleleIndices = new int[myAlleles.size()]; + for (byte b : BaseUtils.BASES) { + boolean isRef = (b==refByte); + alleleIndices[idx++] = myAlleles.indexOf(Allele.create(b,isRef)); + } + + } + + // ------------------------------------------------------------------------------------- + // + // add() routines. These are the workhorse routines for calculating the overall genotype + // likelihoods given observed bases and reads. Includes high-level operators all the + // way down to single base and qual functions. + // + // ------------------------------------------------------------------------------------- + + public int add(ReadBackedPileup pileup, UnifiedArgumentCollection UAC) { + mbq = UAC.MIN_BASE_QUALTY_SCORE; // record for later use + return add(pileup, true, true, mbq); + } + + /** + * Updates likelihoods and posteriors to reflect the additional observations contained within the + * read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the + * pileup + * + * @param pileup read pileup + * @param ignoreBadBases should we ignore bad bases? + * @param capBaseQualsAtMappingQual should we cap a base's quality by its read's mapping quality? + * @param minBaseQual the minimum base quality at which to consider a base valid + * @return the number of good bases found in the pileup + */ + public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) { + int n = 0; + + if ( useBAQedPileup ) + pileup = createBAQedPileup( pileup ); + + if (!hasReferenceSampleData) { + return add(pileup, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual, null); + } + + for (String laneID : perLaneErrorModels.keySet() ) { + // get pileup for this lane + ReadBackedPileup perLanePileup; + if (ignoreLaneInformation) + perLanePileup = pileup; + else + perLanePileup = pileup.getPileupForLane(laneID); + + if (perLanePileup == null || perLanePileup.isEmpty()) + continue; + + ErrorModel errorModel = perLaneErrorModels.get(laneID); + n += add(perLanePileup, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual, errorModel); + if (ignoreLaneInformation) + break; + + } + + return n; + } + + /** + * Calculates the pool's probability for all possible allele counts for all bases. Calculation is based on the error model + * generated by the reference sample on the same lane. The probability is given by : + * + * Pr(ac=jA,jC,jG,jT| pool, errorModel) = sum_over_all_Qs ( Pr(ac=jA,jC,jG,jT) * Pr(errorModel_q) * + * [jA*(1-eq)/2n + eq/3*(jc+jg+jt)/2N)^nA * jC*(1-eq)/2n + eq/3*(ja+jg+jt)/2N)^nC * + * jG*(1-eq)/2n + eq/3*(jc+ja+jt)/2N)^nG * jT*(1-eq)/2n + eq/3*(jc+jg+ja)/2N)^nT + * + * log Pr(ac=jA,jC,jG,jT| pool, errorModel) = logsum( Pr(ac=jA,jC,jG,jT) * Pr(errorModel_q) * + * [jA*(1-eq)/2n + eq/3*(jc+jg+jt)/2N)^nA * jC*(1-eq)/2n + eq/3*(ja+jg+jt)/2N)^nC * + * jG*(1-eq)/2n + eq/3*(jc+ja+jt)/2N)^nG * jT*(1-eq)/2n + eq/3*(jc+jg+ja)/2N)^nT) + * = logsum(logPr(ac=jA,jC,jG,jT) + log(Pr(error_Model(q) + * )) + nA*log(jA/2N(1-eq)+eq/3*(2N-jA)/2N) + nC*log(jC/2N(1-eq)+eq/3*(2N-jC)/2N) + * + log(jG/2N(1-eq)+eq/3*(2N-jG)/2N) + log(jT/2N(1-eq)+eq/3*(2N-jT)/2N) + * + * Let Q(j,k) = log(j/2N*(1-e[k]) + (2N-j)/2N*e[k]/3) + * + * Then logPr(ac=jA,jC,jG,jT|D,errorModel) = logPR(ac=Ja,jC,jG,jT) + logsum_k( logPr (errorModel[k], + * nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k)) + * + * If pileup data comes from several error models (because lanes can have different error models), + * Pr(Ac=j|D,E1,E2) = sum(Pr(AC1=j1|D,E1,E2) * Pr(AC2=j-j2|D,E1,E2)) + * = sum(Pr(AC1=j1|D,E1)*Pr(AC2=j-j1|D,E2)) from j=0..2N + * + * So, for each lane, build error model and combine lanes. + * To store model, can do + * for jA=0:2N + * for jC = 0:2N-jA + * for jG = 0:2N-jA-jC + * for jT = 0:2N-jA-jC-jG + * Q(jA,jC,jG,jT) + * for k = minSiteQual:maxSiteQual + * likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k)) + * + * + * + * where: nA,nC,nG,nT = counts of bases observed in pileup. + * + * + * @param pileup Base pileup + * @param ignoreBadBases Whether to ignore bad bases + * @param capBaseQualsAtMappingQual Cap base at mapping qual + * @param minBaseQual Minimum base quality to consider + * @param errorModel Site error model + * @return Number of bases added + */ + private int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual, ErrorModel errorModel) { + // Number of [A C G T]'s in pileup, in that order + List numSeenBases = new ArrayList(BaseUtils.BASES.length); + for (byte b: BaseUtils.BASES) + numSeenBases.add(0); + + if (hasReferenceSampleData) { + // count number of elements in pileup + for (PileupElement elt : pileup) { + byte obsBase = elt.getBase(); + byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); + if ( qual == 0 ) + continue; + + int idx = 0; + + for (byte base:BaseUtils.BASES) { + int cnt = numSeenBases.get(idx); + numSeenBases.set(idx++,cnt + (base == obsBase?1:0)); + + } + + } + if (VERBOSE) + System.out.format("numSeenBases: %d %d %d %d\n",numSeenBases.get(0),numSeenBases.get(1),numSeenBases.get(2),numSeenBases.get(3)); + } + computeLikelihoods(errorModel, myAlleles, numSeenBases, pileup); + return pileup.getNumberOfElements(); + } + + /** + * Compute likelihood of current conformation + * + * @param ACset Count to compute + * @param errorModel Site-specific error model object + * @param alleleList List of alleles + * @param numObservations Number of observations for each allele in alleleList + */ + public void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset, + final ErrorModel errorModel, + final List alleleList, + final List numObservations, + final ReadBackedPileup pileup) { + final int[] currentCnt = Arrays.copyOf(ACset.ACcounts.counts, BaseUtils.BASES.length); + final int[] ac = new int[BaseUtils.BASES.length]; + + for (int k=0; k < BaseUtils.BASES.length; k++ ) + ac[k] = currentCnt[alleleIndices[k]]; + + double p1 = 0.0; + + if (!hasReferenceSampleData) { + // no error model: loop throught pileup to compute likalihoods just on base qualities + for (final PileupElement elt : pileup) { + final byte obsBase = elt.getBase(); + final byte qual = qualToUse(elt, true, true, mbq); + if ( qual == 0 ) + continue; + final double acc[] = new double[ACset.ACcounts.counts.length]; + for (int k=0; k < acc.length; k++ ) + acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +MathUtils.log10Cache[ACset.ACcounts.counts[k]] + - LOG10_PLOIDY; + p1 += MathUtils.log10sumLog10(acc); + } + } + else { + final int minQ = errorModel.getMinSignificantQualityScore(); + final int maxQ = errorModel.getMaxSignificantQualityScore(); + final double[] acVec = new double[maxQ - minQ + 1]; + + final int nA = numObservations.get(0); + final int nC = numObservations.get(1); + final int nG = numObservations.get(2); + final int nT = numObservations.get(3); + + + for (int k=minQ; k<=maxQ; k++) + acVec[k-minQ] = nA*logMismatchProbabilityArray[ac[0]][k] + + nC*logMismatchProbabilityArray[ac[1]][k] + + nG*logMismatchProbabilityArray[ac[2]][k] + + nT*logMismatchProbabilityArray[ac[3]][k]; + + p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ), acVec); + } + ACset.log10Likelihoods[0] = p1; + /* System.out.println(Arrays.toString(ACset.ACcounts.getCounts())+" "+String.valueOf(p1)); + System.out.println(Arrays.toString(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ))); + */ + } + + public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) { + final List BAQedElements = new ArrayList(); + for( final PileupElement PE : pileup ) { + final PileupElement newPE = new BAQedPileupElement( PE ); + BAQedElements.add( newPE ); + } + return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements ); + } + + public class BAQedPileupElement extends PileupElement { + public BAQedPileupElement( final PileupElement PE ) { + super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip()); + } + + @Override + public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); } + } + + + /** + * Helper function that returns the phred-scaled base quality score we should use for calculating + * likelihoods for a pileup element. May return 0 to indicate that the observation is bad, and may + * cap the quality score by the mapping quality of the read itself. + * + * @param p Pileup element + * @param ignoreBadBases Flag to ignore bad bases + * @param capBaseQualsAtMappingQual Whether to cap base Q at mapping quality + * @param minBaseQual Min qual to use + * @return New phred-scaled base quality + */ + private static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) { + if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) ) + return 0; + + byte qual = p.getQual(); + + if ( qual > SAMUtils.MAX_PHRED_SCORE ) + throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName())); + if ( capBaseQualsAtMappingQual ) + qual = (byte)Math.min((int)qual, p.getMappingQual()); + if ( (int)qual < minBaseQual ) + qual = (byte)0; + + return qual; + } + + static { + qualLikelihoodCache = new double[BaseUtils.BASES.length][BaseUtils.BASES.length][1+SAMUtils.MAX_PHRED_SCORE]; + for (byte j=0; j <= SAMUtils.MAX_PHRED_SCORE; j++) { + for (byte b1:BaseUtils.BASES) { + for (byte b2:BaseUtils.BASES) { + qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(b1)][BaseUtils.simpleBaseToBaseIndex(b2)][j] = log10PofObservingBaseGivenChromosome(b1,b2,j); + } + } + } + + } + + /** + * + * @param observedBase observed base + * @param chromBase target base + * @param qual base quality + * @return log10 likelihood + */ + private static double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual) { + final double log10_3 = log10(3.0); + double logP; + + if ( observedBase == chromBase ) { + // the base is consistent with the chromosome -- it's 1 - e + //logP = oneMinusData[qual]; + double e = pow(10, (qual / -10.0)); + logP = log10(1.0 - e); + } else { + // the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error) + logP = qual / -10.0 + (-log10_3); + } + + //System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP); + return logP; + } + +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java new file mode 100644 index 000000000..30d614455 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java @@ -0,0 +1,128 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.util.*; + +public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel { + + + protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { + super(UAC, logger); + + } + + protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, + final double[] logLikelihoods, + final int ploidy, + final HashMap perLaneErrorModels, + final boolean useBQAedPileup, + final ReferenceContext ref, + final boolean ignoreLaneInformation) { + return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO); + } + + protected List getInitialAllelesToUse(final RefMetaDataTracker tracker, + final ReferenceContext ref, + Map contexts, + final AlignmentContextUtils.ReadOrientation contextType, + final GenomeLocParser locParser, + final List allAllelesToUse) { + + if (allAllelesToUse != null) + return allAllelesToUse; + + + final byte refBase = ref.getBase(); + final List allAlleles = new ArrayList(); + // first add ref allele + allAlleles.add(Allele.create(refBase, true)); + // add all possible alt alleles + for (byte b: BaseUtils.BASES) { + if (refBase != b) + allAlleles.add(Allele.create(b)); + } + + return allAlleles; + } + + protected List getFinalAllelesToUse(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final List allAllelesToUse, + final ArrayList GLs) { + // find the alternate allele(s) that we should be using + final List alleles = new ArrayList(); + if ( allAllelesToUse != null ) { + alleles.addAll(allAllelesToUse); + } else if ( UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles); + + // ignore places where we don't have a SNP + if ( vc == null || !vc.isSNP() ) + return null; + + alleles.addAll(vc.getAlleles()); + } else { + + alleles.add(Allele.create(ref.getBase(),true)); + alleles.addAll(determineAlternateAlleles( GLs)); + + // if there are no non-ref alleles... + if ( alleles.size() == 1 ) { + final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(ref.getBase()); + // if we only want variants, then we don't need to calculate genotype likelihoods + if ( UAC.OutputMode != UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) + // otherwise, choose any alternate allele (it doesn't really matter) + alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0))); + } + } + return alleles; + } + + /** + * @param tracker dummy parameter here + * @param ref Reference context + * @param alternateAllelesToUse alt allele list + * @return end location for vc to be created + */ + protected int getEndLocation(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final List alternateAllelesToUse) { + // for SNPs, end loc is is the same as start loc + return ref.getLocus().getStart(); + + } + + +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypePriors.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypePriors.java new file mode 100644 index 000000000..df5f6002b --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypePriors.java @@ -0,0 +1,58 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; +import org.broadinstitute.sting.utils.MathUtils; + +public class PoolGenotypePriors implements GenotypePriors { + private final double[] flatPriors; + private final double heterozygosity; + private final int samplesPerPool; + private double[] priors = null; + + /** + * Create a new DiploidGenotypePriors object with flat priors for each diploid genotype + */ + public PoolGenotypePriors(double heterozygosity, int samplesPerPool) { + flatPriors = new double[2*samplesPerPool+1]; + for (int k=0; k 0.0) + throw new ReviewedStingException("BUG: Attempting to create a log-probability vector with positive elements"); + + if (compressRange) { + minVal = getMinIdx(vec, maxValIdx); + maxVal = getMaxIdx(vec, maxValIdx); + probabilityArray = Arrays.copyOfRange(vec, minVal, maxVal+1); + + } else { + probabilityArray = vec; + minVal = 0; + maxVal = vec.length-1; + + } + } + + public ProbabilityVector(double[] vec) { + this(vec,true); + } + + public ProbabilityVector(ProbabilityVector other, boolean compressRange) { + // create new probability vector from other. + this(other.getUncompressedProbabilityVector(), compressRange); + + } + public int getMinVal() { return minVal;} + public int getMaxVal() { return maxVal;} + public double[] getProbabilityVector() { return probabilityArray;} + + public double[] getProbabilityVector(int minVal, int maxVal) { + // get vector in specified range. If range is outside of current vector, fill with negative infinities + double[] x = new double[maxVal - minVal + 1]; + + for (int k=minVal; k <= maxVal; k++) + x[k-minVal] = getLogProbabilityForIndex(k); + + + return x; + } + + public double[] getUncompressedProbabilityVector() { + double x[] = new double[maxVal+1]; + + for (int i=0; i < minVal; i++) + x[i] = Double.NEGATIVE_INFINITY; + for (int i=minVal; i <=maxVal; i++) + x[i] = probabilityArray[i-minVal]; + + return x; + } + /** + * Return log Probability for original index i + * @param idx Index to probe + * @return log10(Pr X = i) ) + */ + public double getLogProbabilityForIndex(int idx) { + if (idx < minVal || idx > maxVal) + return Double.NEGATIVE_INFINITY; + else + return probabilityArray[idx-minVal]; + } + + //public ProbabilityVector + public static ProbabilityVector compressVector(double[] vec ) { + return new ProbabilityVector(vec, true); + } + + /** + * Determine left-most index where a vector exceeds (max Value - DELTA) + * @param vec Input vector + * @param maxValIdx Index to stop - usually index with max value in vector + * @return Min index where vector > vec[maxValIdx]-LOG_DYNAMIC_RANGE + */ + private static int getMinIdx(double[] vec, int maxValIdx) { + int edgeIdx; + for (edgeIdx=0; edgeIdx<=maxValIdx; edgeIdx++ ) { + if (vec[edgeIdx] > vec[maxValIdx]-LOG_DYNAMIC_RANGE) + break; + } + + return edgeIdx; + + + } + + /** + * Determine right-most index where a vector exceeds (max Value - DELTA) + * @param vec Input vector + * @param maxValIdx Index to stop - usually index with max value in vector + * @return Max index where vector > vec[maxValIdx]-LOG_DYNAMIC_RANGE + */ + private static int getMaxIdx(double[] vec, int maxValIdx) { + int edgeIdx; + for (edgeIdx=vec.length-1; edgeIdx>=maxValIdx; edgeIdx-- ) { + if (vec[edgeIdx] > vec[maxValIdx]-LOG_DYNAMIC_RANGE) + break; + } + + return edgeIdx; + + + } + + /** + * + * @param other + * @return + */ + public double logDotProduct(ProbabilityVector other) { + // find overlap in range + int minRange = Math.max(this.minVal, other.getMinVal()); + int maxRange = Math.min(this.maxVal, other.getMaxVal()); + if (minRange > maxRange) + return Double.NEGATIVE_INFINITY; + + // x = 0,1,2, y = 2,3,4. minRange = 2, maxRange = 2 + double[] result = new double[maxRange - minRange+1]; + for (int k=0; k <= maxRange-minRange; k++) { + int startI = minRange - this.minVal; + int startJ = minRange - other.getMinVal(); + result[k] = this.probabilityArray[k+startI] + other.probabilityArray[k+startJ]; + + + + } + return MathUtils.approximateLog10SumLog10(result); + } + +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java new file mode 100755 index 000000000..0890ac20c --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import org.jgrapht.graph.DefaultDirectedGraph; + +/** + * Created by IntelliJ IDEA. + * User: ebanks + * Date: Mar 23, 2011 + */ + +// simple edge class for connecting nodes in the graph +public class DeBruijnEdge implements Comparable { + + private int multiplicity; + private boolean isRef; + + public DeBruijnEdge() { + multiplicity = 1; + isRef = false; + } + + public DeBruijnEdge( final boolean isRef ) { + multiplicity = 1; + this.isRef = isRef; + } + + public DeBruijnEdge( final boolean isRef, final int multiplicity ) { + this.multiplicity = multiplicity; + this.isRef = isRef; + } + + public int getMultiplicity() { + return multiplicity; + } + + public void setMultiplicity( final int value ) { + multiplicity = value; + } + + public boolean getIsRef() { + return isRef; + } + + public void setIsRef( final boolean isRef ) { + this.isRef = isRef; + } + + public boolean equals( final DefaultDirectedGraph graph, final DeBruijnEdge edge ) { + return (graph.getEdgeSource(this).equals(graph.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph.getEdgeTarget(edge))); + } + + public boolean equals( final DefaultDirectedGraph graph, final DeBruijnEdge edge, final DefaultDirectedGraph graph2 ) { + return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge))); + } + + @Override + public int compareTo( final DeBruijnEdge that ) { + return this.multiplicity - that.multiplicity; + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java new file mode 100755 index 000000000..39833613d --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java @@ -0,0 +1,46 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: ebanks + * Date: Mar 23, 2011 + */ +// simple node class for storing kmer sequences +public class DeBruijnVertex { + + protected final byte[] sequence; + public final int kmer; + + public DeBruijnVertex( final byte[] sequence, final int kmer ) { + this.sequence = sequence; + this.kmer = kmer; + } + + @Override + public boolean equals( Object v ) { + return v instanceof DeBruijnVertex && Arrays.equals(sequence, ((DeBruijnVertex) v).sequence); + } + + @Override + public int hashCode() { // necessary to override here so that graph.containsVertex() works the same way as vertex.equals() as one might expect + return Arrays.hashCode(sequence); + } + + public String toString() { + return new String(sequence); + } + + public String getSuffixString() { + return new String( getSuffix() ); + } + + public byte[] getSequence() { + return sequence; + } + + public byte[] getSuffix() { + return Arrays.copyOfRange( sequence, kmer - 1, sequence.length ); + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java new file mode 100644 index 000000000..52c13d124 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -0,0 +1,616 @@ +/* + * Copyright (c) 2011 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; +import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.util.*; + +public class GenotypingEngine { + + private final boolean DEBUG; + private final int MNP_LOOK_AHEAD; + private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE; + private final static List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied + private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("", false); + + public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) { + this.DEBUG = DEBUG; + this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD; + this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE; + noCall.add(Allele.NO_CALL); + } + + // This function is the streamlined approach, currently not being used + @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) + public List>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList haplotypes, final byte[] ref, final GenomeLoc refLoc, + final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) { + // Prepare the list of haplotype indices to genotype + final ArrayList allelesToGenotype = new ArrayList(); + + for( final Haplotype h : haplotypes ) { + allelesToGenotype.add( Allele.create(h.getBases(), h.isReference()) ); + } + final int numHaplotypes = haplotypes.size(); + + // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample + final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size()); + for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples + final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2]; + final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, sample); + int glIndex = 0; + for( int iii = 0; iii < numHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ) { + genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC + } + } + genotypes.add(new GenotypeBuilder(sample, noCall).PL(genotypeLikelihoods).make()); + } + final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder().loc(activeRegionWindow).alleles(allelesToGenotype).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel); + if( call == null ) { return Collections.emptyList(); } // exact model says that the call confidence is below the specified confidence threshold so nothing to do here + + // Prepare the list of haplotypes that need to be run through Smith-Waterman for output to VCF + final ArrayList haplotypesToRemove = new ArrayList(); + for( final Haplotype h : haplotypes ) { + if( call.getAllele(h.getBases()) == null ) { // exact model removed this allele from the list so no need to run SW and output to VCF + haplotypesToRemove.add(h); + } + } + haplotypes.removeAll(haplotypesToRemove); + + if( OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) { + final List>>> returnVCs = new ArrayList>>>(); + // set up the default 1-to-1 haplotype mapping object + final HashMap> haplotypeMapping = new HashMap>(); + for( final Haplotype h : haplotypes ) { + final ArrayList list = new ArrayList(); + list.add(h); + haplotypeMapping.put(call.getAllele(h.getBases()), list); + } + returnVCs.add( new Pair>>(call,haplotypeMapping) ); + return returnVCs; + } + + final ArrayList>>> returnCalls = new ArrayList>>>(); + + // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file + final TreeSet startPosKeySet = new TreeSet(); + int count = 0; + if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); } + for( final Haplotype h : haplotypes ) { + if( DEBUG ) { + System.out.println( h.toString() ); + System.out.println( "> Cigar = " + h.getCigar() ); + } + // Walk along the alignment and turn any difference from the reference into an event + h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) ); + startPosKeySet.addAll(h.getEventMap().keySet()); + } + + // Create the VC merge priority list + final ArrayList priorityList = new ArrayList(); + for( int iii = 0; iii < haplotypes.size(); iii++ ) { + priorityList.add("HC" + iii); + } + + // Walk along each position in the key set and create each event to be outputted + for( final int loc : startPosKeySet ) { + if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { + final ArrayList eventsAtThisLoc = new ArrayList(); + for( final Haplotype h : haplotypes ) { + final HashMap eventMap = h.getEventMap(); + final VariantContext vc = eventMap.get(loc); + if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) { + eventsAtThisLoc.add(vc); + } + } + + // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event + final ArrayList> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes ); + + // Merge the event to find a common reference representation + final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); + + final HashMap> alleleHashMap = new HashMap>(); + int aCount = 0; + for( final Allele a : mergedVC.getAlleles() ) { + alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper + } + + if( DEBUG ) { + System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); + //System.out.println("Event/haplotype allele mapping = " + alleleMapper); + } + + // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample + final GenotypesContext myGenotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size()); + for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples + final int myNumHaplotypes = alleleMapper.size(); + final double[] genotypeLikelihoods = new double[myNumHaplotypes * (myNumHaplotypes+1) / 2]; + final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper); + int glIndex = 0; + for( int iii = 0; iii < myNumHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ) { + genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC + } + } + + // using the allele mapping object translate the haplotype allele into the event allele + final Genotype g = new GenotypeBuilder(sample) + .alleles(findEventAllelesInSample(mergedVC.getAlleles(), call.getAlleles(), call.getGenotype(sample).getAlleles(), alleleMapper, haplotypes)) + .phased(loc != startPosKeySet.first()) + .PL(genotypeLikelihoods).make(); + myGenotypes.add(g); + } + returnCalls.add( new Pair>>( + new VariantContextBuilder(mergedVC).log10PError(call.getLog10PError()).genotypes(myGenotypes).make(), alleleHashMap) ); + } + } + return returnCalls; + } + + @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) + public List>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, + final ArrayList haplotypes, + final byte[] ref, + final GenomeLoc refLoc, + final GenomeLoc activeRegionWindow, + final GenomeLocParser genomeLocParser, + final ArrayList activeAllelesToGenotype ) { + + final ArrayList>>> returnCalls = new ArrayList>>>(); + + // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file + final TreeSet startPosKeySet = new TreeSet(); + int count = 0; + if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); } + for( final Haplotype h : haplotypes ) { + // Walk along the alignment and turn any difference from the reference into an event + h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) ); + if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); } + if( DEBUG ) { + System.out.println( h.toString() ); + System.out.println( "> Cigar = " + h.getCigar() ); + System.out.println( "> Left and right breaks = (" + h.leftBreakPoint + " , " + h.rightBreakPoint + ")"); + System.out.println( ">> Events = " + h.getEventMap()); + } + } + // Create the VC merge priority list + final ArrayList priorityList = new ArrayList(); + for( int iii = 0; iii < haplotypes.size(); iii++ ) { + priorityList.add("HC" + iii); + } + + cleanUpSymbolicUnassembledEvents( haplotypes, priorityList ); + if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 3 ) { // if not in GGA mode and have at least 3 samples try to create MNP and complex events by looking at LD structure + mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc ); + } + if( !activeAllelesToGenotype.isEmpty() ) { // we are in GGA mode! + for( final VariantContext compVC : activeAllelesToGenotype ) { + startPosKeySet.add( compVC.getStart() ); + } + } + + + // Walk along each position in the key set and create each event to be outputted + for( final int loc : startPosKeySet ) { + if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { + final ArrayList eventsAtThisLoc = new ArrayList(); + if( activeAllelesToGenotype.isEmpty() ) { + for( final Haplotype h : haplotypes ) { + final HashMap eventMap = h.getEventMap(); + final VariantContext vc = eventMap.get(loc); + if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) { + eventsAtThisLoc.add(vc); + } + } + } else { // we are in GGA mode! + for( final VariantContext compVC : activeAllelesToGenotype ) { + if( compVC.getStart() == loc ) { + priorityList.clear(); + int alleleCount = 0; + for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { + HashSet alleleSet = new HashSet(2); + alleleSet.add(compVC.getReference()); + alleleSet.add(compAltAllele); + priorityList.add("Allele" + alleleCount); + eventsAtThisLoc.add(new VariantContextBuilder(compVC).alleles(alleleSet).source("Allele"+alleleCount).make()); + alleleCount++; + } + } + } + } + + if( eventsAtThisLoc.isEmpty() ) { continue; } + + // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event + final ArrayList> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes ); + + // Merge the event to find a common reference representation + final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); + if( mergedVC == null ) { continue; } + + final HashMap> alleleHashMap = new HashMap>(); + int aCount = 0; + for( final Allele a : mergedVC.getAlleles() ) { + alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper + } + + if( DEBUG ) { + System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); + //System.out.println("Event/haplotype allele mapping = " + alleleMapper); + } + + // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample + final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size()); + for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples + final int numHaplotypes = alleleMapper.size(); + final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2]; + final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper); + int glIndex = 0; + for( int iii = 0; iii < numHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ) { + genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC + } + } + genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() ); + } + final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel); + + if( call != null ) { + returnCalls.add( new Pair>>(call, alleleHashMap) ); + } + } + } + return returnCalls; + } + + protected static void cleanUpSymbolicUnassembledEvents( final ArrayList haplotypes, final ArrayList priorityList ) { + final ArrayList haplotypesToRemove = new ArrayList(); + final ArrayList stringsToRemove = new ArrayList(); + for( final Haplotype h : haplotypes ) { + for( final VariantContext vc : h.getEventMap().values() ) { + if( vc.isSymbolic() ) { + for( final Haplotype h2 : haplotypes ) { + for( final VariantContext vc2 : h2.getEventMap().values() ) { + if( vc.getStart() == vc2.getStart() && vc2.isIndel() ) { + haplotypesToRemove.add(h); + stringsToRemove.add(vc.getSource()); + break; + } + } + } + } + } + } + haplotypes.removeAll(haplotypesToRemove); + priorityList.removeAll(stringsToRemove); + } + + protected void mergeConsecutiveEventsBasedOnLD( final ArrayList haplotypes, final TreeSet startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) { + final int MAX_SIZE_TO_COMBINE = 15; + final double MERGE_EVENTS_R2_THRESHOLD = 0.95; + if( startPosKeySet.size() <= 1 ) { return; } + + boolean mapWasUpdated = true; + while( mapWasUpdated ) { + mapWasUpdated = false; + + // loop over the set of start locations and consider pairs that start near each other + final Iterator iter = startPosKeySet.iterator(); + int thisStart = iter.next(); + while( iter.hasNext() ) { + final int nextStart = iter.next(); + if( nextStart - thisStart < MAX_SIZE_TO_COMBINE) { + boolean isBiallelic = true; + VariantContext thisVC = null; + VariantContext nextVC = null; + double x11 = Double.NEGATIVE_INFINITY; + double x12 = Double.NEGATIVE_INFINITY; + double x21 = Double.NEGATIVE_INFINITY; + double x22 = Double.NEGATIVE_INFINITY; + + for( final Haplotype h : haplotypes ) { + // only make complex substitutions out of consecutive biallelic sites + final VariantContext thisHapVC = h.getEventMap().get(thisStart); + if( thisHapVC != null && !thisHapVC.isSymbolic() ) { // something was found at this location on this haplotype + if( thisVC == null ) { + thisVC = thisHapVC; + } else if( !thisHapVC.hasSameAllelesAs( thisVC ) ) { + isBiallelic = false; + break; + } + } + final VariantContext nextHapVC = h.getEventMap().get(nextStart); + if( nextHapVC != null && !nextHapVC.isSymbolic() ) { // something was found at the next location on this haplotype + if( nextVC == null ) { + nextVC = nextHapVC; + } else if( !nextHapVC.hasSameAllelesAs( nextVC ) ) { + isBiallelic = false; + break; + } + } + // count up the co-occurrences of the events for the R^2 calculation + final ArrayList haplotypeList = new ArrayList(); + haplotypeList.add(h); + for( final String sample : haplotypes.get(0).getSampleKeySet() ) { + final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0]; + if( thisHapVC == null ) { + if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); } + else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); } + } else { + if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); } + else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); } + } + } + } + if( thisVC == null || nextVC == null ) { + continue; + } + if( isBiallelic ) { + final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) ); + if( DEBUG ) { + System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2)); + System.out.println("-- " + thisVC); + System.out.println("-- " + nextVC); + } + if( R2 > MERGE_EVENTS_R2_THRESHOLD ) { + + final VariantContext mergedVC = createMergedVariantContext(thisVC, nextVC, ref, refLoc); + + // remove the old event from the eventMap on every haplotype and the start pos key set, replace with merged event + for( final Haplotype h : haplotypes ) { + final HashMap eventMap = h.getEventMap(); + if( eventMap.containsKey(thisStart) && eventMap.containsKey(nextStart) ) { + eventMap.remove(thisStart); + eventMap.remove(nextStart); + eventMap.put(mergedVC.getStart(), mergedVC); + } + } + startPosKeySet.add(mergedVC.getStart()); + boolean containsStart = false; + boolean containsNext = false; + for( final Haplotype h : haplotypes ) { + final HashMap eventMap = h.getEventMap(); + if( eventMap.containsKey(thisStart) ) { containsStart = true; } + if( eventMap.containsKey(nextStart) ) { containsNext = true; } + } + if(!containsStart) { startPosKeySet.remove(thisStart); } + if(!containsNext) { startPosKeySet.remove(nextStart); } + + if( DEBUG ) { System.out.println("====> " + mergedVC); } + mapWasUpdated = true; + break; // break out of tree set iteration since it was just updated, start over from the beginning and keep merging events + } + } + } + thisStart = nextStart; + } + } + } + + // BUGBUG: make this merge function more general + protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) { + final int thisStart = thisVC.getStart(); + final int nextStart = nextVC.getStart(); + byte[] refBases = new byte[]{}; + byte[] altBases = new byte[]{}; + refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases()); + altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases()); + int locus; + for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) { + final byte refByte = ref[locus - refLoc.getStart()]; + refBases = ArrayUtils.add(refBases, refByte); + altBases = ArrayUtils.add(altBases, refByte); + } + refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel + altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases()); + + int iii = 0; + if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele + while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } + } + final ArrayList mergedAlleles = new ArrayList(); + mergedAlleles.add( Allele.create( ArrayUtils.subarray(refBases, iii, refBases.length), true ) ); + mergedAlleles.add( Allele.create( ArrayUtils.subarray(altBases, iii, altBases.length), false ) ); + return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make(); + } + + protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) { + final double total = x11 + x12 + x21 + x22; + final double pa1b1 = x11 / total; + final double pa1b2 = x12 / total; + final double pa2b1 = x21 / total; + final double pa1 = pa1b1 + pa1b2; + final double pb1 = pa1b1 + pa2b1; + return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) ); + } + + @Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"}) + @Ensures({"result.size() == eventsAtThisLoc.size() + 1"}) + protected static ArrayList> createAlleleMapper( final int loc, final ArrayList eventsAtThisLoc, final ArrayList haplotypes ) { + final ArrayList> alleleMapper = new ArrayList>(); + final ArrayList refList = new ArrayList(); + for( final Haplotype h : haplotypes ) { + if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype + refList.add(h); + } else { + boolean foundInEventList = false; + for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) { + if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) { + foundInEventList = true; + } + } + if( !foundInEventList ) { // event at this location isn't one of the genotype-able options (during GGA) so this is a reference-supporting haplotype + refList.add(h); + } + } + } + alleleMapper.add(refList); + for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) { + final ArrayList list = new ArrayList(); + for( final Haplotype h : haplotypes ) { + if( h.getEventMap().get(loc) != null && h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) { + list.add(h); + } + } + alleleMapper.add(list); + } + return alleleMapper; + } + + @Ensures({"result.size() == haplotypeAllelesForSample.size()"}) + protected static List findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final ArrayList> alleleMapper, final ArrayList haplotypes ) { + if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; } + final ArrayList eventAllelesForSample = new ArrayList(); + for( final Allele a : haplotypeAllelesForSample ) { + final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a)); + for( int iii = 0; iii < alleleMapper.size(); iii++ ) { + final ArrayList mappedHaplotypes = alleleMapper.get(iii); + if( mappedHaplotypes.contains(haplotype) ) { + eventAllelesForSample.add(eventAlleles.get(iii)); + break; + } + } + } + return eventAllelesForSample; + } + + protected static boolean containsVCWithMatchingAlleles( final List list, final VariantContext vcToTest ) { + for( final VariantContext vc : list ) { + if( vc.hasSameAllelesAs(vcToTest) ) { + return true; + } + } + return false; + } + + protected static HashMap generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) { + return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code + } + + protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) { + final HashMap vcs = new HashMap(); + + int refPos = alignmentStartHapwrtRef; + if( refPos < 0 ) { return null; } // Protection against SW failures + int alignmentPos = 0; + + for( final CigarElement ce : cigar.getCigarElements() ) { + final int elementLength = ce.getLength(); + switch( ce.getOperator() ) { + case I: + final ArrayList insertionAlleles = new ArrayList(); + final int insertionStart = refLoc.getStart() + refPos - 1; + insertionAlleles.add( Allele.create(ref[refPos-1], true) ); + if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { + insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); + } else { + byte[] insertionBases = new byte[]{}; + insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base + insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength )); + insertionAlleles.add( Allele.create(insertionBases, false) ); + } + vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); + alignmentPos += elementLength; + break; + case S: + alignmentPos += elementLength; + break; + case D: + final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base + final ArrayList deletionAlleles = new ArrayList(); + final int deletionStart = refLoc.getStart() + refPos - 1; + // BUGBUG: how often does this symbolic deletion allele case happen? + //if( haplotype != null && ( (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength) + // || (haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength) ) ) { + // deletionAlleles.add( Allele.create(ref[refPos-1], true) ); + // deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); + // vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make()); + //} else { + deletionAlleles.add( Allele.create(deletionBases, true) ); + deletionAlleles.add( Allele.create(ref[refPos-1], false) ); + vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make()); + //} + refPos += elementLength; + break; + case M: + int numSinceMismatch = -1; + int stopOfMismatch = -1; + int startOfMismatch = -1; + int refPosStartOfMismatch = -1; + for( int iii = 0; iii < elementLength; iii++ ) { + if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) { + // SNP or start of possible MNP + if( stopOfMismatch == -1 ) { + startOfMismatch = alignmentPos; + stopOfMismatch = alignmentPos; + numSinceMismatch = 0; + refPosStartOfMismatch = refPos; + } else { + stopOfMismatch = alignmentPos; + } + } + if( stopOfMismatch != -1) { + numSinceMismatch++; + } + if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) { + final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 ); + final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 ); + final ArrayList snpAlleles = new ArrayList(); + snpAlleles.add( Allele.create( refBases, true ) ); + snpAlleles.add( Allele.create( mismatchBases, false ) ); + final int snpStart = refLoc.getStart() + refPosStartOfMismatch; + vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make()); + numSinceMismatch = -1; + stopOfMismatch = -1; + startOfMismatch = -1; + refPosStartOfMismatch = -1; + } + refPos++; + alignmentPos++; + } + break; + case N: + case H: + case P: + default: + throw new ReviewedStingException( "Unsupported cigar operator created during SW alignment: " + ce.getOperator() ); + } + } + return vcs; + } +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java new file mode 100755 index 000000000..559347843 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -0,0 +1,567 @@ +/* + * Copyright (c) 2011 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import com.google.java.contract.Ensures; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; +import org.broadinstitute.sting.gatk.walkers.PartitionBy; +import org.broadinstitute.sting.gatk.walkers.PartitionType; +import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; +import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.clipping.ReadClipper; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.fragments.FragmentCollection; +import org.broadinstitute.sting.utils.fragments.FragmentUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; + +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.*; + +/** + * Call SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region. Haplotypes are evaluated using an affine gap penalty Pair HMM. + * + *

Input

+ *

+ * Input bam file(s) from which to make calls + *

+ * + *

Output

+ *

+ * VCF file with raw, unrecalibrated SNP and indel calls. + *

+ * + *

Examples

+ *
+ *   java
+ *     -jar GenomeAnalysisTK.jar
+ *     -T HaplotypeCaller
+ *     -R reference/human_g1k_v37.fasta
+ *     -I sample1.bam [-I sample2.bam ...] \
+ *     --dbsnp dbSNP.vcf \
+ *     -stand_call_conf [50.0] \
+ *     -stand_emit_conf 10.0 \
+ *     [-L targets.interval_list]
+ *     -o output.raw.snps.indels.vcf
+ * 
+ * + *

Caveats

+ *
    + *
  • The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.
  • + *
+ * + * @author rpoplin + * @since 8/22/11 + */ + +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) +@PartitionBy(PartitionType.LOCUS) +@ActiveRegionExtension(extension=65, maxRegion=300) +public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { + + /** + * A raw, unfiltered, highly sensitive callset in VCF format. + */ + @Output(doc="File to which variants should be written", required = true) + protected VariantContextWriter vcfWriter = null; + + @Output(fullName="graphOutput", shortName="graph", doc="File to which debug assembly graph information should be written", required = false) + protected PrintStream graphWriter = null; + + @Hidden + @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) + protected String keepRG = null; + + @Hidden + @Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false) + protected int MNP_LOOK_AHEAD = 0; + + @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) + protected int MIN_PRUNE_FACTOR = 1; + + @Advanced + @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false) + protected boolean GENOTYPE_FULL_ACTIVE_REGION = false; + + @Advanced + @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false) + protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false; + + @Advanced + @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false) + protected int gcpHMM = 10; + + @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) + protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000; + + @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false) + protected boolean USE_ALLELES_TRIGGER = false; + + /** + * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. + * dbSNP is not used in any way for the calculations themselves. + */ + @ArgumentCollection + protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + public RodBinding getDbsnpRodBinding() { return dbsnp.dbsnp; } + + /** + * If a call overlaps with a record from the provided comp track, the INFO field will be annotated + * as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field). + * Records that are filtered in the comp track will be ignored. + * Note that 'dbSNP' has been special-cased (see the --dbsnp argument). + */ + @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false) + public List> comps = Collections.emptyList(); + public List> getCompRodBindings() { return comps; } + + // The following are not used by the Unified Genotyper + public RodBinding getSnpEffRodBinding() { return null; } + public List> getResourceRodBindings() { return Collections.emptyList(); } + public boolean alwaysAppendDbsnpId() { return false; } + + /** + * Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations. + */ + @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false) + protected List annotationsToUse = new ArrayList(Arrays.asList(new String[]{"ClippingRankSumTest"})); + + /** + * Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the -A or -G arguments, + * so annotations will be excluded even if they are explicitly included with the other options. + */ + @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false) + protected List annotationsToExclude = new ArrayList(Arrays.asList(new String[]{"HaplotypeScore", "MappingQualityZero", "SpanningDeletions", "TandemRepeatAnnotator"})); + + /** + * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups. + */ + @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false) + protected String[] annotationClassesToUse = { "Standard" }; + + @ArgumentCollection + private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); + + // the calculation arguments + private UnifiedGenotyperEngine UG_engine = null; + private UnifiedGenotyperEngine UG_engine_simple_genotyper = null; + + @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false) + protected boolean DEBUG; + + // the assembly engine + LocalAssemblyEngine assemblyEngine = null; + + // the likelihoods engine + LikelihoodCalculationEngine likelihoodCalculationEngine = null; + + // the genotyping engine + GenotypingEngine genotypingEngine = null; + + // the annotation engine + private VariantAnnotatorEngine annotationEngine; + + // fasta reference reader to supplement the edges of the reference sequence + private IndexedFastaSequenceFile referenceReader; + + // reference base padding size + private static final int REFERENCE_PADDING = 900; + + // bases with quality less than or equal to this value are trimmed off the tails of the reads + private static final byte MIN_TAIL_QUALITY = 20; + + private ArrayList samplesList = new ArrayList(); + private final static double LOG_ONE_HALF = -Math.log10(2.0); + private final static double LOG_ONE_THIRD = -Math.log10(3.0); + private final ArrayList allelesToGenotype = new ArrayList(); + + private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file + private final static Allele FAKE_ALT_ALLELE = Allele.create("", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file + + //--------------------------------------------------------------------------------------------------------------- + // + // initialize + // + //--------------------------------------------------------------------------------------------------------------- + + public void initialize() { + super.initialize(); + + // get all of the unique sample names + Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + samplesList.addAll( samples ); + // initialize the UnifiedGenotyper Engine which is used to call into the exact model + UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument + UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling + UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling + UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING); + UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); + UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + + // initialize the output VCF header + annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); + + Set headerInfo = new HashSet(); + + // all annotation fields from VariantAnnotatorEngine + headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); + // all callers need to add these standard annotation header lines + VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, + VCFConstants.DOWNSAMPLED_KEY, + VCFConstants.MLE_ALLELE_COUNT_KEY, + VCFConstants.MLE_ALLELE_FREQUENCY_KEY); + // all callers need to add these standard FORMAT field header lines + VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true, + VCFConstants.GENOTYPE_KEY, + VCFConstants.GENOTYPE_QUALITY_KEY, + VCFConstants.DEPTH_KEY, + VCFConstants.GENOTYPE_PL_KEY); + // header lines for the experimental HaplotypeCaller-specific annotations + headerInfo.add(new VCFInfoHeaderLine("NVH", 1, VCFHeaderLineType.Integer, "Number of variants found on the haplotype that contained this variant")); + headerInfo.add(new VCFInfoHeaderLine("NumHapEval", 1, VCFHeaderLineType.Integer, "Number of haplotypes that were chosen for evaluation in this active region")); + headerInfo.add(new VCFInfoHeaderLine("NumHapAssembly", 1, VCFHeaderLineType.Integer, "Number of haplotypes created during the assembly of this active region")); + headerInfo.add(new VCFInfoHeaderLine("ActiveRegionSize", 1, VCFHeaderLineType.Integer, "Number of base pairs that comprise this active region")); + headerInfo.add(new VCFInfoHeaderLine("EVENTLENGTH", 1, VCFHeaderLineType.Integer, "Max length of all the alternate alleles")); + headerInfo.add(new VCFInfoHeaderLine("TYPE", 1, VCFHeaderLineType.String, "Type of event: SNP or INDEL")); + headerInfo.add(new VCFInfoHeaderLine("extType", 1, VCFHeaderLineType.String, "Extended type of event: SNP, MNP, INDEL, or COMPLEX")); + headerInfo.add(new VCFInfoHeaderLine("QDE", 1, VCFHeaderLineType.Float, "QD value divided by the number of variants found on the haplotype that contained this variant")); + + vcfWriter.writeHeader(new VCFHeader(headerInfo, samples)); + + try { + // fasta reference reader to supplement the edges of the reference sequence + referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile); + } catch( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e); + } + + assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter ); + likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false ); + genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE ); + } + + //--------------------------------------------------------------------------------------------------------------- + // + // isActive + // + //--------------------------------------------------------------------------------------------------------------- + + // enable deletions in the pileup + @Override + public boolean includeReadsWithDeletionAtLoci() { return true; } + + // enable non primary reads in the active region + @Override + public boolean wantsNonPrimaryReads() { return true; } + + @Override + @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"}) + public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { + + if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) { + if( !allelesToGenotype.contains(vc) ) { + allelesToGenotype.add(vc); // save for later for processing during the ActiveRegion's map call. Should be folded into a ReadMetaDataTracker object + } + } + if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) { + return new ActivityProfileResult(1.0); + } + } + + if( USE_ALLELES_TRIGGER ) { + return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); + } + + if( context == null ) { return new ActivityProfileResult(0.0); } + + final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied + noCall.add(Allele.NO_CALL); + + final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); + final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); + final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); + for( final Map.Entry sample : splitContexts.entrySet() ) { + final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event) + Arrays.fill(genotypeLikelihoods, 0.0); + + for( final PileupElement p : sample.getValue().getBasePileup() ) { + final byte qual = p.getQual(); + if( p.isDeletion() || qual > (byte) 18) { + int AA = 0; final int AB = 1; int BB = 2; + if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) { + AA = 2; + BB = 0; + if( p.isNextToSoftClip() ) { + averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28)); + } + } + genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual); + genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF ); + genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; + } + } + genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() ); + } + + final ArrayList alleles = new ArrayList(); + alleles.add( FAKE_REF_ALLELE ); + alleles.add( FAKE_ALT_ALLELE ); + final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); + final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); + + return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); + } + + //--------------------------------------------------------------------------------------------------------------- + // + // map + // + //--------------------------------------------------------------------------------------------------------------- + + @Override + public Integer map( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker ) { + + final ArrayList activeAllelesToGenotype = new ArrayList(); + + if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + for( final VariantContext vc : allelesToGenotype ) { + if( activeRegion.getLocation().overlapsP( getToolkit().getGenomeLocParser().createGenomeLoc(vc) ) ) { + activeAllelesToGenotype.add(vc); // do something with these VCs during GGA mode + } + } + allelesToGenotype.removeAll( activeAllelesToGenotype ); + } + + if( !activeRegion.isActive ) { return 0; } // Not active so nothing to do! + if( activeRegion.size() == 0 && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { return 0; } // No reads here so nothing to do! + if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && activeAllelesToGenotype.isEmpty() ) { return 0; } // No alleles found in this region so nothing to do! + + finalizeActiveRegion( activeRegion ); // merge overlapping fragments, clip adapter and low qual tails + final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader)); // Create the reference haplotype which is the bases from the reference that make up the active region + referenceHaplotype.setIsReference(true); + final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING); + //int PRUNE_FACTOR = Math.max(MIN_PRUNE_FACTOR, determinePruneFactorFromCoverage( activeRegion )); + final ArrayList haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, getPaddedLoc(activeRegion), MIN_PRUNE_FACTOR, activeAllelesToGenotype ); + if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do! + + activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region + final List filteredReads = filterNonPassingReads( activeRegion ); // filter out reads from genotyping which fail mapping quality based criteria + if( activeRegion.size() == 0 ) { return 1; } // no reads remain after filtering so nothing else to do! + + // evaluate each sample's reads against all haplotypes + final HashMap> perSampleReadList = splitReadsBySample( activeRegion.getReads() ); + final HashMap> perSampleFilteredReadList = splitReadsBySample( filteredReads ); + likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList ); + + // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) + final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); + + for( final Pair>> callResult : + ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES + ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() ) + : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) { + if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); } + + final Map>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult ); + final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst()); + + // add some custom annotations to the calls + final Map myAttributes = new LinkedHashMap(annotatedCall.getAttributes()); + // Calculate the number of variants on the haplotype + int maxNumVar = 0; + for( final Allele allele : callResult.getFirst().getAlleles() ) { + if( !allele.isReference() ) { + for( final Haplotype haplotype : callResult.getSecond().get(allele) ) { + final int numVar = haplotype.getEventMap().size(); + if( numVar > maxNumVar ) { maxNumVar = numVar; } + } + } + } + // Calculate the event length + int maxLength = 0; + for ( final Allele a : annotatedCall.getAlternateAlleles() ) { + final int length = a.length() - annotatedCall.getReference().length(); + if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; } + } + + myAttributes.put("NVH", maxNumVar); + myAttributes.put("NumHapEval", bestHaplotypes.size()); + myAttributes.put("NumHapAssembly", haplotypes.size()); + myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size()); + myAttributes.put("EVENTLENGTH", maxLength); + myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") ); + myAttributes.put("extType", annotatedCall.getType().toString() ); + + //if( likelihoodCalculationEngine.haplotypeScore != null ) { + // myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore)); + //} + if( annotatedCall.hasAttribute("QD") ) { + myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) ); + } + + vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() ); + } + + if( DEBUG ) { System.out.println("----------------------------------------------------------------------------------"); } + + return 1; // One active region was processed during this map call + } + + //--------------------------------------------------------------------------------------------------------------- + // + // reduce + // + //--------------------------------------------------------------------------------------------------------------- + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer cur, Integer sum) { + return cur + sum; + } + + @Override + public void onTraversalDone(Integer result) { + logger.info("Ran local assembly on " + result + " active regions"); + } + + //--------------------------------------------------------------------------------------------------------------- + // + // private helper functions + // + //--------------------------------------------------------------------------------------------------------------- + + private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { + if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } + final ArrayList finalizedReadList = new ArrayList(); + final FragmentCollection fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) ); + activeRegion.clearReads(); + + // Join overlapping paired reads to create a single longer read + finalizedReadList.addAll( fragmentCollection.getSingletonReads() ); + for( final List overlappingPair : fragmentCollection.getOverlappingPairs() ) { + finalizedReadList.addAll( FragmentUtils.mergeOverlappingPairedFragments(overlappingPair) ); + } + + Collections.shuffle(finalizedReadList, GenomeAnalysisEngine.getRandomGenerator()); + + // Loop through the reads hard clipping the adaptor and low quality tails + for( final GATKSAMRecord myRead : finalizedReadList ) { + final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) ); + if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { + final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY ); + // protect against INTERVALS with abnormally high coverage + if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) { + activeRegion.add(clippedRead); + } + } + } + } + + private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { + final ArrayList readsToRemove = new ArrayList(); + for( final GATKSAMRecord rec : activeRegion.getReads() ) { + if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { + readsToRemove.add(rec); + } + } + activeRegion.removeAll( readsToRemove ); + return readsToRemove; + } + + private GenomeLoc getPaddedLoc( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { + final int padLeft = Math.max(activeRegion.getReferenceLoc().getStart()-REFERENCE_PADDING, 1); + final int padRight = Math.min(activeRegion.getReferenceLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getReferenceLoc().getContig()).getSequenceLength()); + return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReferenceLoc().getContig(), padLeft, padRight); + } + + private HashMap> splitReadsBySample( final List reads ) { + final HashMap> returnMap = new HashMap>(); + for( final String sample : samplesList) { + ArrayList readList = returnMap.get( sample ); + if( readList == null ) { + readList = new ArrayList(); + returnMap.put(sample, readList); + } + } + for( final GATKSAMRecord read : reads ) { + returnMap.get(read.getReadGroup().getSample()).add(read); + } + + return returnMap; + } + + /* + private int determinePruneFactorFromCoverage( final ActiveRegion activeRegion ) { + final ArrayList readLengthDistribution = new ArrayList(); + for( final GATKSAMRecord read : activeRegion.getReads() ) { + readLengthDistribution.add(read.getReadLength()); + } + final double meanReadLength = MathUtils.average(readLengthDistribution); + final double meanCoveragePerSample = (double) activeRegion.getReads().size() / ((double) activeRegion.getExtendedLoc().size() / meanReadLength) / (double) samplesList.size(); + int PRUNE_FACTOR = 0; + if( meanCoveragePerSample > 8.5 ) { + PRUNE_FACTOR = (int) Math.floor( Math.sqrt( meanCoveragePerSample - 5.0 ) ); + } else if( meanCoveragePerSample > 3.0 ) { + PRUNE_FACTOR = 1; + } + + if( DEBUG ) { System.out.println(String.format("Mean coverage per sample = %.1f --> prune factor = %d", meanCoveragePerSample, PRUNE_FACTOR)); } + return PRUNE_FACTOR; + } + */ +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java new file mode 100755 index 000000000..38f11cc5d --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2011 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.SWPairwiseAlignment; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; + +import java.util.*; + +/** + * Haplotype-based resolution of variants in 2 different eval files. + * + *

+ * HaplotypeResolver is a tool that takes 2 VCF files and constructs haplotypes based on the variants inside them. + * From that, it can resolve potential differences in variant calls that are inherently the same (or similar) variants. + * Records are annotated with the set and status attributes. + * + *

Input

+ *

+ * 2 variant files to resolve. + *

+ * + *

Output

+ *

+ * A single consensus VCF. + *

+ * + *

Examples

+ *
+ * java -Xmx1g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T HaplotypeResolver \
+ *   -V:v1 input1.vcf \
+ *   -V:v2 input2.vcf \
+ *   -o output.vcf
+ * 
+ * + */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) +@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW)) +public class HaplotypeResolver extends RodWalker { + + protected static final String INTERSECTION_SET = "intersection"; + protected static final String SAME_STATUS = "same"; + protected static final String SOME_ALLELES_MATCH_STATUS = "someAllelesMatch"; + protected static final String SAME_START_DIFFERENT_ALLELES_STATUS = "sameStartDifferentAlleles"; + protected static final String SAME_BY_HAPLOTYPE_STATUS = "sameByHaplotype"; + protected static final String ONE_ALLELE_SUBSET_OF_OTHER_STATUS = "OneAlleleSubsetOfOther"; + protected static final String OVERLAPPING_EVENTS_STATUS = "overlappingEvents"; + + protected final static int MAX_DISTANCE_BETWEEN_MERGED_RECORDS = 50; + protected final static int MAX_HAPLOTYPE_TO_CONSIDER = 1000; + protected final static int MAX_VARIANT_SIZE_TO_CONSIDER = 100; + protected final static int ACTIVE_WINDOW = MAX_HAPLOTYPE_TO_CONSIDER + MAX_VARIANT_SIZE_TO_CONSIDER; + + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public List> variants; + + @Output(doc="File to which variants should be written", required=true) + protected VariantContextWriter baseWriter = null; + private VariantContextWriter writer; + + /** + * Set to 'null' if you don't want the set field emitted. + */ + @Argument(fullName="setKey", shortName="setKey", doc="Key used in the INFO key=value tag emitted describing which set the combined VCF record came from", required=false) + protected String SET_KEY = "set"; + + /** + * Set to 'null' if you don't want the status field emitted. + */ + @Argument(fullName="statusKey", shortName="statusKey", doc="Key used in the INFO key=value tag emitted describing the extent to which records match", required=false) + protected String STATUS_KEY = "status"; + + private final LinkedList queue = new LinkedList(); + private String source1, source2; + private final List sourceVCs1 = new ArrayList(); + private final List sourceVCs2 = new ArrayList(); + + + private class VCcontext { + public final Collection vcs; + public final GenomeLoc loc; + public final ReferenceContext ref; + + public VCcontext(final Collection vcs, final ReferenceContext ref) { + this.vcs = vcs; + this.loc = getToolkit().getGenomeLocParser().createGenomeLoc(vcs.iterator().next()); + this.ref = ref; + } + } + + public void initialize() { + + if ( variants.size() != 2 ) { + throw new UserException.BadArgumentValue("variant", "this tool requires exactly 2 input variant files"); + } + source1 = variants.get(0).getName(); + source2 = variants.get(1).getName(); + + if ( SET_KEY.toLowerCase().equals("null") ) + SET_KEY = null; + if ( STATUS_KEY.toLowerCase().equals("null") ) + STATUS_KEY = null; + + // for now, INFO and FORMAT fields are not propagated to the output VCF (so they aren't put into the header) + Set headerLines = new HashSet(); + if ( SET_KEY != null ) + headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record")); + if ( STATUS_KEY != null ) + headerLines.add(new VCFInfoHeaderLine(STATUS_KEY, 1, VCFHeaderLineType.String, "Extent to which records match")); + final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.emptySet()); + baseWriter.writeHeader(vcfHeader); + writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, ACTIVE_WINDOW); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return 0; + + final Collection VCs = tracker.getValues(variants, context.getLocation()); + if ( VCs.size() == 0 ) + return 0; + + final VCcontext vc = new VCcontext(VariantContextUtils.sitesOnlyVariantContexts(VCs), ref); + + // TODO -- what should we do about filtered records? + + if ( !queue.isEmpty() ) { + + final VCcontext previous = queue.getLast(); + if ( !previous.loc.onSameContig(vc.loc) || + previous.loc.distance(vc.loc) > MAX_DISTANCE_BETWEEN_MERGED_RECORDS || + queue.getFirst().loc.distance(vc.loc) > MAX_HAPLOTYPE_TO_CONSIDER ) { + purgeQueue(); + } + } + + queue.addLast(vc); + return 0; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return sum + value; + } + + public void onTraversalDone(Integer result) { + if ( !queue.isEmpty() ) + purgeQueue(); + writer.close(); + } + + private void purgeQueue() { + + final ReferenceContext refContext = queue.getFirst().ref; + + // divide them up by source + while ( !queue.isEmpty() ) { + VCcontext context = queue.removeFirst(); + for ( final VariantContext vc: context.vcs ) { + if ( vc.getSource().equals(source1) ) + sourceVCs1.add(vc); + else + sourceVCs2.add(vc); + } + } + + writeAndPurgeAllEqualVariants(sourceVCs1, sourceVCs2, SAME_STATUS); + + if ( sourceVCs1.isEmpty() ) { + writeAll(sourceVCs2, source2, null); + } else if ( sourceVCs2.isEmpty() ) { + writeAll(sourceVCs1, source1, null); + } else { + resolveByHaplotype(refContext); + } + + // allow for GC of the data + sourceVCs1.clear(); + sourceVCs2.clear(); + } + + private void writeAll(final List sourceVCs, final String set, final String status) { + for ( final VariantContext vc : sourceVCs ) { + writeOne(vc, set, status); + } + } + + private void writeOne(final VariantContext vc, final String set, final String status) { + final Map attrs = new HashMap(vc.getAttributes()); + if ( SET_KEY != null && set != null ) + attrs.put(SET_KEY, set); + if ( STATUS_KEY != null && status != null ) + attrs.put(STATUS_KEY, status); + writer.add(new VariantContextBuilder(vc).attributes(attrs).make()); + } + + private void writeAndPurgeAllEqualVariants(final List sourceVCs1, final List sourceVCs2, final String status) { + + int currentIndex1 = 0, currentIndex2 = 0; + int size1 = sourceVCs1.size(), size2 = sourceVCs2.size(); + VariantContext current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null); + VariantContext current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null); + + while ( current1 != null && current2 != null ) { + + final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1); + final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2); + + if ( loc1.equals(loc2) || + (loc1.getStart() == loc2.getStart() && (current1.getAlternateAlleles().size() > 1 || current2.getAlternateAlleles().size() > 1)) ) { + // test the alleles + if ( determineAndWriteOverlap(current1, current2, status) ) { + sourceVCs1.remove(currentIndex1); + sourceVCs2.remove(currentIndex2); + size1--; + size2--; + } else { + currentIndex1++; + currentIndex2++; + } + current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null); + current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null); + } else if ( loc1.isBefore(loc2) ) { + currentIndex1++; + current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null); + } else { + currentIndex2++; + current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null); + } + } + } + + private boolean determineAndWriteOverlap(final VariantContext vc1, final VariantContext vc2, final String status) { + final int allelesFrom1In2 = findOverlap(vc1, vc2); + final int allelesFrom2In1 = findOverlap(vc2, vc1); + final int totalAllelesIn1 = vc1.getAlternateAlleles().size(); + final int totalAllelesIn2 = vc2.getAlternateAlleles().size(); + + final boolean allAllelesFrom1Overlap = allelesFrom1In2 == totalAllelesIn1; + final boolean allAllelesFrom2Overlap = allelesFrom2In1 == totalAllelesIn2; + + boolean thereIsOverlap = true; + + if ( allAllelesFrom1Overlap && allAllelesFrom2Overlap ) { + writeOne(vc1, INTERSECTION_SET, status); + } else if ( allAllelesFrom1Overlap ) { + writeOne(vc2, INTERSECTION_SET, source1 + "IsSubsetOf" + source2); + } else if ( allAllelesFrom2Overlap ) { + writeOne(vc1, INTERSECTION_SET, source2 + "IsSubsetOf" + source1); + } else if ( allelesFrom1In2 > 0 ) { + writeOne(vc1, INTERSECTION_SET, SOME_ALLELES_MATCH_STATUS); + } else if ( totalAllelesIn1 > 1 || totalAllelesIn2 > 1 ) { // we don't handle multi-allelics in the haplotype-based reconstruction + writeOne(vc1, INTERSECTION_SET, SAME_START_DIFFERENT_ALLELES_STATUS); + } else { + thereIsOverlap = false; + } + + return thereIsOverlap; + } + + private static int findOverlap(final VariantContext target, final VariantContext comparison) { + int overlap = 0; + for ( final Allele allele : target.getAlternateAlleles() ) { + if ( comparison.hasAlternateAllele(allele) ) + overlap++; + } + return overlap; + } + + private static final double SW_MATCH = 4.0; + private static final double SW_MISMATCH = -10.0; + private static final double SW_GAP = -25.0; + private static final double SW_GAP_EXTEND = -1.3; + private void resolveByHaplotype(final ReferenceContext refContext) { + + final byte[] source1Haplotype = generateHaplotype(sourceVCs1, refContext); + final byte[] source2Haplotype = generateHaplotype(sourceVCs2, refContext); + + final SWPairwiseAlignment swConsensus1 = new SWPairwiseAlignment( refContext.getBases(), source1Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); + final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( refContext.getBases(), source2Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); + + // protect against SW failures + if( swConsensus1.getCigar().toString().contains("S") || swConsensus1.getCigar().getReferenceLength() < 20 || + swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() < 20 ) { + // TODO -- handle errors appropriately + logger.debug("Bad SW alignment; aborting at " + refContext.getLocus()); + return; + } + + // order results by start position + final TreeMap source1Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0)); + final TreeMap source2Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0)); + if ( source1Map.size() == 0 || source2Map.size() == 0 ) { + // TODO -- handle errors appropriately + logger.debug("No source alleles; aborting at " + refContext.getLocus()); + return; + } + + // create lists and test for equality + final List source1Alleles = new ArrayList(source1Map.values()); + final List source2Alleles = new ArrayList(source2Map.values()); + + writeAndPurgeAllEqualVariants(source1Alleles, source2Alleles, SAME_BY_HAPLOTYPE_STATUS); + if ( source1Alleles.isEmpty() ) { + writeAll(source2Alleles, source2, null); + } else if ( source2Alleles.isEmpty() ) { + writeAll(source1Alleles, source1, null); + } else { + writeDifferences(source1Alleles, source2Alleles); + } + } + + private byte[] generateHaplotype(final List sourceVCs, final ReferenceContext refContext) { + + final StringBuilder sb = new StringBuilder(); + + final int startPos = refContext.getWindow().getStart(); + int currentPos = startPos; + final byte[] reference = refContext.getBases(); + + for ( final VariantContext vc : sourceVCs ) { + // add any missing reference context + int vcStart = vc.getStart(); + final int refAlleleLength = vc.getReference().length(); + if ( refAlleleLength == vc.getEnd() - vc.getStart() ) // this is a deletion (whereas for other events the padding base isn't part of the position) + vcStart++; + + while ( currentPos < vcStart ) + sb.append((char)reference[currentPos++ - startPos]); + + // add the alt allele + sb.append(vc.getAlternateAllele(0).getBaseString()); + + // skip the reference allele + currentPos += refAlleleLength; + } + // add any missing reference context + final int stopPos = refContext.getWindow().getStop(); + while ( currentPos < stopPos ) + sb.append((char)reference[currentPos++ - startPos]); + + return sb.toString().getBytes(); + } + + private void writeDifferences(final List source1Alleles, final List source2Alleles) { + int currentIndex1 = 0, currentIndex2 = 0; + final int size1 = source1Alleles.size(), size2 = source2Alleles.size(); + VariantContext current1 = source1Alleles.get(0); + VariantContext current2 = source2Alleles.get(0); + + while ( currentIndex1 < size1 || currentIndex2 < size2 ) { + if ( current1 == null ) { + writeOne(current2, source2, null); + currentIndex2++; + current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null); + } else if ( current2 == null ) { + writeOne(current1, source1, null); + currentIndex1++; + current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null); + } else { + + final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1); + final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2); + + if ( loc1.getStart() == loc2.getStart() || loc1.overlapsP(loc2) ) { + String status; + if ( loc1.getStart() == loc2.getStart() ) { + final String allele1 = current1.getAlternateAllele(0).getBaseString(); + final String allele2 = current2.getAlternateAllele(0).getBaseString(); + if ( allele1.indexOf(allele2) != -1 || allele2.indexOf(allele1) != -1 ) + status = ONE_ALLELE_SUBSET_OF_OTHER_STATUS; + else + status = SAME_START_DIFFERENT_ALLELES_STATUS; + } else { + status = OVERLAPPING_EVENTS_STATUS; + } + + writeOne(current1, INTERSECTION_SET, status); + currentIndex1++; + currentIndex2++; + current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null); + current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null); + } else if ( loc1.isBefore(loc2) ) { + writeOne(current1, source1, null); + currentIndex1++; + current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null); + } else { + writeOne(current2, source2, null); + currentIndex2++; + current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null); + } + } + } + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java new file mode 100755 index 000000000..0ef1a13a4 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java @@ -0,0 +1,149 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.jgrapht.graph.DefaultDirectedGraph; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: ebanks + * Date: Mar 23, 2011 + */ +// Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph. +// This is different from most graph traversals because we want to test paths from any source node to any sink node. +public class KBestPaths { + + // static access only + protected KBestPaths() { } + private static int MAX_PATHS_TO_HOLD = 100; + + protected static class MyInt { public int val = 0; } + + // class to keep track of paths + protected static class Path { + + // the last vertex seen in the path + private DeBruijnVertex lastVertex; + + // the list of edges comprising the path + private ArrayList edges; + + // the scores for the path + private int totalScore = 0, lowestEdge = -1; + + public Path( final DeBruijnVertex initialVertex ) { + lastVertex = initialVertex; + edges = new ArrayList(0); + } + + public Path( final Path p, final DefaultDirectedGraph graph, final DeBruijnEdge edge ) { + lastVertex = graph.getEdgeTarget(edge); + edges = new ArrayList(p.edges); + edges.add(edge); + totalScore = p.totalScore + edge.getMultiplicity(); + lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity()); + } + + public boolean containsEdge( final DefaultDirectedGraph graph, final DeBruijnEdge edge ) { + final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge); + for( final DeBruijnEdge e : edges ) { + if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) { + return true; + } + } + + return false; + } + + public ArrayList getEdges() { return edges; } + + public int getScore() { return totalScore; } + + public int getLowestEdge() { return lowestEdge; } + + public DeBruijnVertex getLastVertexInPath() { return lastVertex; } + + public byte[] getBases( final DefaultDirectedGraph graph ) { + if( edges.size() == 0 ) { return lastVertex.getSequence(); } + + byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence(); + for( final DeBruijnEdge e : edges ) { + bases = ArrayUtils.addAll(bases, graph.getEdgeTarget( e ).getSuffix()); + } + return bases; + } + } + + protected static class PathComparatorTotalScore implements Comparator { + public int compare(final Path path1, final Path path2) { + return path1.totalScore - path2.totalScore; + } + } + + //protected static class PathComparatorLowestEdge implements Comparator { + // public int compare(final Path path1, final Path path2) { + // return path2.lowestEdge - path1.lowestEdge; + // } + //} + + public static List getKBestPaths( final DefaultDirectedGraph graph, final int k ) { + if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); } + final ArrayList bestPaths = new ArrayList(); + + // run a DFS for best paths + for( final DeBruijnVertex v : graph.vertexSet() ) { + if( graph.inDegreeOf(v) == 0 ) { + findBestPaths(graph, new Path(v), bestPaths); + } + } + + Collections.sort(bestPaths, new PathComparatorTotalScore() ); + Collections.reverse(bestPaths); + return bestPaths.subList(0, Math.min(k, bestPaths.size())); + } + + private static void findBestPaths( final DefaultDirectedGraph graph, final Path path, final List bestPaths ) { + findBestPaths(graph, path, bestPaths, new MyInt()); + } + + private static void findBestPaths( final DefaultDirectedGraph graph, final Path path, final List bestPaths, MyInt n ) { + + // did we hit the end of a path? + if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) { + if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) { + // clean out some low scoring paths + Collections.sort(bestPaths, new PathComparatorTotalScore() ); + for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20 + } + bestPaths.add(path); + } else if( n.val > 10000) { + // do nothing, just return + } else { + // recursively run DFS + final ArrayList edgeArrayList = new ArrayList(); + edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex)); + Collections.sort(edgeArrayList); + Collections.reverse(edgeArrayList); + for ( final DeBruijnEdge edge : edgeArrayList ) { + // make sure the edge is not already in the path + if ( path.containsEdge(graph, edge) ) + continue; + + final Path newPath = new Path(path, graph, edge); + n.val++; + findBestPaths(graph, newPath, bestPaths, n); + } + } + } + + private static boolean allOutgoingEdgesHaveBeenVisited( final DefaultDirectedGraph graph, final Path path ) { + for( final DeBruijnEdge edge : graph.outgoingEdgesOf(path.lastVertex) ) { + if( !path.containsEdge(graph, edge) ) { + return false; + } + } + return true; + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java new file mode 100644 index 000000000..fabf5633f --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2011 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; + +public class LikelihoodCalculationEngine { + + private static final double LOG_ONE_HALF = -Math.log10(2.0); + private static final double BEST_LIKELIHOOD_THRESHOLD = 0.1; + private final byte constantGCP; + private final boolean DEBUG; + private final PairHMM pairHMM; + + public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final boolean noBanded ) { + pairHMM = new PairHMM( noBanded ); + this.constantGCP = constantGCP; + DEBUG = debug; + } + + public void computeReadLikelihoods( final ArrayList haplotypes, final HashMap> perSampleReadList ) { + + int X_METRIC_LENGTH = 0; + for( final Map.Entry> sample : perSampleReadList.entrySet() ) { + for( final GATKSAMRecord read : sample.getValue() ) { + final int readLength = read.getReadLength(); + if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; } + } + } + int Y_METRIC_LENGTH = 0; + for( final Haplotype h : haplotypes ) { + final int haplotypeLength = h.getBases().length; + if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; } + } + + // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment + X_METRIC_LENGTH += 2; + Y_METRIC_LENGTH += 2; + + // initial arrays to hold the probabilities of being in the match, insertion and deletion cases + final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + + PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH); + + // for each sample's reads + for( final String sample : perSampleReadList.keySet() ) { + //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); } + // evaluate the likelihood of the reads given those haplotypes + computeReadLikelihoods( haplotypes, perSampleReadList.get(sample), sample, matchMetricArray, XMetricArray, YMetricArray ); + } + } + + private void computeReadLikelihoods( final ArrayList haplotypes, final ArrayList reads, final String sample, + final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) { + + final int numHaplotypes = haplotypes.size(); + final int numReads = reads.size(); + final double[][] readLikelihoods = new double[numHaplotypes][numReads]; + final int[][] readCounts = new int[numHaplotypes][numReads]; + for( int iii = 0; iii < numReads; iii++ ) { + final GATKSAMRecord read = reads.get(iii); + final int readCount = ReadUtils.getMeanRepresentativeReadCount(read); + + final byte[] overallGCP = new byte[read.getReadLength()]; + Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? + Haplotype previousHaplotypeSeen = null; + final byte[] readQuals = read.getBaseQualities(); + final byte[] readInsQuals = read.getBaseInsertionQualities(); + final byte[] readDelQuals = read.getBaseDeletionQualities(); + for( int kkk = 0; kkk < readQuals.length; kkk++ ) { + readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality + //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated + //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated + readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); + } + + for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { + final Haplotype haplotype = haplotypes.get(jjj); + final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) ); + previousHaplotypeSeen = haplotype; + + readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(), + readQuals, readInsQuals, readDelQuals, overallGCP, + haplotypeStart, matchMetricArray, XMetricArray, YMetricArray); + readCounts[jjj][iii] = readCount; + } + } + for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { + haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj], readCounts[jjj] ); + } + } + + private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) { + for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){ + if( b1[iii] != b2[iii] ) { + return iii; + } + } + return b1.length; + } + + @Requires({"haplotypes.size() > 0"}) + @Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"}) + public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList haplotypes, final String sample ) { + // set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization? + final ArrayList> haplotypeMapping = new ArrayList>(); + for( final Haplotype h : haplotypes ) { + final ArrayList list = new ArrayList(); + list.add(h); + haplotypeMapping.add(list); + } + return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping ); + } + + // This function takes just a single sample and a haplotypeMapping + @Requires({"haplotypeMapping.size() > 0"}) + @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"}) + public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList> haplotypeMapping ) { + final TreeSet sampleSet = new TreeSet(); + sampleSet.add(sample); + return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping); + } + + // This function takes a set of samples to pool over and a haplotypeMapping + @Requires({"haplotypeMapping.size() > 0"}) + @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"}) + public static double[][] computeDiploidHaplotypeLikelihoods( final Set samples, final ArrayList> haplotypeMapping ) { + + final int numHaplotypes = haplotypeMapping.size(); + final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes]; + for( int iii = 0; iii < numHaplotypes; iii++ ) { + Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY); + } + + // compute the diploid haplotype likelihoods + // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code + for( int iii = 0; iii < numHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ) { + for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) { + for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) { + double haplotypeLikelihood = 0.0; + for( final String sample : samples ) { + final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample); + final int[] readCounts_iii = iii_mapped.getReadCounts(sample); + final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample); + for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) { + // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) + // log10(10^(a*x1) + 10^(b*x2)) ??? + // First term is approximated by Jacobian log with table lookup. + haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF ); + } + } + haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum? + } + } + } + } + + // normalize the diploid likelihoods matrix + return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix ); + } + + @Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"}) + @Ensures({"result.length == result[0].length", "result.length == likelihoodMatrix.length"}) + protected static double[][] normalizeDiploidLikelihoodMatrixFromLog10( final double[][] likelihoodMatrix ) { + final int numHaplotypes = likelihoodMatrix.length; + double[] genotypeLikelihoods = new double[numHaplotypes*(numHaplotypes+1)/2]; + int index = 0; + for( int iii = 0; iii < numHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ){ + genotypeLikelihoods[index++] = likelihoodMatrix[iii][jjj]; + } + } + genotypeLikelihoods = MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true); + index = 0; + for( int iii = 0; iii < numHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ){ + likelihoodMatrix[iii][jjj] = genotypeLikelihoods[index++]; + } + } + return likelihoodMatrix; + } + + /* + @Requires({"haplotypes.size() > 0"}) + @Ensures({"result.size() <= haplotypes.size()"}) + public ArrayList selectBestHaplotypes( final ArrayList haplotypes ) { + + // BUGBUG: This function needs a lot of work. Need to use 4-gamete test or Tajima's D to decide to break up events into separate pieces for genotyping + + final int numHaplotypes = haplotypes.size(); + final Set sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples + final ArrayList bestHaplotypesIndexList = new ArrayList(); + bestHaplotypesIndexList.add(0); // always start with the reference haplotype + final double[][][] haplotypeLikelihoodMatrix = new double[sampleKeySet.size()][numHaplotypes][numHaplotypes]; + + int sampleCount = 0; + for( final String sample : sampleKeySet ) { + haplotypeLikelihoodMatrix[sampleCount++] = computeDiploidHaplotypeLikelihoods( haplotypes, sample ); + } + + int hap1 = 0; + int hap2 = 0; + int chosenSample = 0; + //double bestElement = Double.NEGATIVE_INFINITY; + final int maxChosenHaplotypes = Math.min( 15, sampleKeySet.size() * 2 + 1 ); + while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { + double maxElement = Double.NEGATIVE_INFINITY; + for( int kkk = 0; kkk < sampleCount; kkk++ ) { + for( int iii = 0; iii < numHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ) { + if( haplotypeLikelihoodMatrix[kkk][iii][jjj] > maxElement ) { + maxElement = haplotypeLikelihoodMatrix[kkk][iii][jjj]; + hap1 = iii; + hap2 = jjj; + chosenSample = kkk; + } + } + } + } + if( maxElement == Double.NEGATIVE_INFINITY ) { break; } + + if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); } + if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); } + + for( int iii = 0; iii < numHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ) { + haplotypeLikelihoodMatrix[chosenSample][iii][jjj] = Double.NEGATIVE_INFINITY; + } + } + } + + if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); } + + final ArrayList bestHaplotypes = new ArrayList(); + for( final int hIndex : bestHaplotypesIndexList ) { + bestHaplotypes.add( haplotypes.get(hIndex) ); + } + return bestHaplotypes; + } + */ + + @Requires({"haplotypes.size() > 0"}) + @Ensures({"result.size() <= haplotypes.size()"}) + public ArrayList selectBestHaplotypes( final ArrayList haplotypes ) { + + final int numHaplotypes = haplotypes.size(); + final Set sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples + final ArrayList bestHaplotypesIndexList = new ArrayList(); + bestHaplotypesIndexList.add(0); // always start with the reference haplotype + // set up the default 1-to-1 haplotype mapping object + final ArrayList> haplotypeMapping = new ArrayList>(); + for( final Haplotype h : haplotypes ) { + final ArrayList list = new ArrayList(); + list.add(h); + haplotypeMapping.add(list); + } + final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together + + int hap1 = 0; + int hap2 = 0; + //double bestElement = Double.NEGATIVE_INFINITY; + final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 ); + while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { + double maxElement = Double.NEGATIVE_INFINITY; + for( int iii = 0; iii < numHaplotypes; iii++ ) { + for( int jjj = 0; jjj <= iii; jjj++ ) { + if( haplotypeLikelihoodMatrix[iii][jjj] > maxElement ) { + maxElement = haplotypeLikelihoodMatrix[iii][jjj]; + hap1 = iii; + hap2 = jjj; + } + } + } + if( maxElement == Double.NEGATIVE_INFINITY ) { break; } + if( DEBUG ) { System.out.println("Chose haplotypes " + hap1 + " and " + hap2 + " with diploid likelihood = " + haplotypeLikelihoodMatrix[hap1][hap2]); } + haplotypeLikelihoodMatrix[hap1][hap2] = Double.NEGATIVE_INFINITY; + + if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); } + if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); } + } + + if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); } + + final ArrayList bestHaplotypes = new ArrayList(); + for( final int hIndex : bestHaplotypesIndexList ) { + bestHaplotypes.add( haplotypes.get(hIndex) ); + } + return bestHaplotypes; + } + + public static Map>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap> perSampleReadList, final HashMap> perSampleFilteredReadList, final Pair>> call) { + final Map>> returnMap = new HashMap>>(); + final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst()); + for( final Map.Entry> sample : perSampleReadList.entrySet() ) { + final Map> alleleReadMap = new HashMap>(); + final ArrayList readsForThisSample = sample.getValue(); + for( int iii = 0; iii < readsForThisSample.size(); iii++ ) { + final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same! + // only count the read if it overlaps the event, otherwise it is not added to the output read list at all + if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { + final double likelihoods[] = new double[call.getFirst().getAlleles().size()]; + int count = 0; + for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood + double maxLikelihood = Double.NEGATIVE_INFINITY; + for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object) + final double likelihood = h.getReadLikelihoods(sample.getKey())[iii]; + if( likelihood > maxLikelihood ) { + maxLikelihood = likelihood; + } + } + likelihoods[count++] = maxLikelihood; + } + final int bestAllele = MathUtils.maxElementIndex(likelihoods); + final double bestLikelihood = likelihoods[bestAllele]; + Allele allele = Allele.NO_CALL; + boolean isInformativeRead = false; + for( final double likelihood : likelihoods ) { + if( bestLikelihood - likelihood > BEST_LIKELIHOOD_THRESHOLD ) { + isInformativeRead = true; + break; + } + } + // uninformative reads get the no call Allele + if( isInformativeRead ) { + allele = call.getFirst().getAlleles().get(bestAllele); + } + List readList = alleleReadMap.get(allele); + if( readList == null ) { + readList = new ArrayList(); + alleleReadMap.put(allele, readList); + } + readList.add(read); + } + } + // add all filtered reads to the NO_CALL list because they weren't given any likelihoods + List readList = alleleReadMap.get(Allele.NO_CALL); + if( readList == null ) { + readList = new ArrayList(); + alleleReadMap.put(Allele.NO_CALL, readList); + } + for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) { + // only count the read if it overlaps the event, otherwise it is not added to the output read list at all + if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { + readList.add(read); + } + } + returnMap.put(sample.getKey(), alleleReadMap); + } + return returnMap; + } +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java new file mode 100755 index 000000000..bf6c82d82 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -0,0 +1,25 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.ArrayList; + +/** + * Created by IntelliJ IDEA. + * User: ebanks + * Date: Mar 14, 2011 + */ +public abstract class LocalAssemblyEngine { + + public enum ASSEMBLER { + SIMPLE_DE_BRUIJN + } + + protected LocalAssemblyEngine() { + } + + public abstract ArrayList runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, ArrayList activeAllelesToGenotype); +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java new file mode 100755 index 000000000..56cb6c3d4 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -0,0 +1,384 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import com.google.java.contract.Ensures; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SWPairwiseAlignment; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.jgrapht.graph.DefaultDirectedGraph; + +import java.io.PrintStream; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: ebanks, rpoplin + * Date: Mar 14, 2011 + */ + +public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { + + private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers + private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11; + private static final byte MIN_QUALITY = (byte) 17; + + // Smith-Waterman parameters originally copied from IndelRealigner + private static final double SW_MATCH = 5.0; // 1.0; + private static final double SW_MISMATCH = -10.0; //-1.0/3.0; + private static final double SW_GAP = -22.0; //-1.0-1.0/3.0; + private static final double SW_GAP_EXTEND = -1.2; //-1.0/.0; + + private final boolean DEBUG; + private final PrintStream GRAPH_WRITER; + private final ArrayList> graphs = new ArrayList>(); + + private int PRUNE_FACTOR = 1; + + public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter ) { + super(); + DEBUG = debug; + GRAPH_WRITER = graphWriter; + } + + public ArrayList runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList activeAllelesToGenotype ) { + this.PRUNE_FACTOR = PRUNE_FACTOR; + + // create the graphs + createDeBruijnGraphs( activeRegion.getReads(), refHaplotype ); + + // clean up the graphs by pruning and merging + for( final DefaultDirectedGraph graph : graphs ) { + pruneGraph( graph, PRUNE_FACTOR ); + //eliminateNonRefPaths( graph ); + mergeNodes( graph ); + } + + if( GRAPH_WRITER != null ) { + printGraphs(); + } + + // find the best paths in the graphs + return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); + } + + protected void createDeBruijnGraphs( final List reads, final Haplotype refHaplotype ) { + graphs.clear(); + + // create the graph + for( int kmer = 31; kmer <= 75; kmer += 6 ) { + final DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class); + if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) { + graphs.add(graph); + } + } + } + + protected static void mergeNodes( final DefaultDirectedGraph graph ) { + boolean foundNodesToMerge = true; + while( foundNodesToMerge ) { + foundNodesToMerge = false; + for( final DeBruijnEdge e : graph.edgeSet() ) { + final DeBruijnVertex outgoingVertex = graph.getEdgeTarget(e); + final DeBruijnVertex incomingVertex = graph.getEdgeSource(e); + if( !outgoingVertex.equals(incomingVertex) && graph.inDegreeOf(outgoingVertex) == 1 && graph.outDegreeOf(incomingVertex) == 1) { + final Set outEdges = graph.outgoingEdgesOf(outgoingVertex); + final Set inEdges = graph.incomingEdgesOf(incomingVertex); + if( inEdges.size() == 1 && outEdges.size() == 1 ) { + inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) ); + outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) ); + } else if( inEdges.size() == 1 ) { + inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) ); + } else if( outEdges.size() == 1 ) { + outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) ); + } + + final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer ); + graph.addVertex(addedVertex); + for( final DeBruijnEdge edge : outEdges ) { + graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity())); + } + for( final DeBruijnEdge edge : inEdges ) { + graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity())); + } + + graph.removeVertex( incomingVertex ); + graph.removeVertex( outgoingVertex ); + foundNodesToMerge = true; + break; + } + } + } + } + + protected static void pruneGraph( final DefaultDirectedGraph graph, final int pruneFactor ) { + final ArrayList edgesToRemove = new ArrayList(); + for( final DeBruijnEdge e : graph.edgeSet() ) { + if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor + edgesToRemove.add(e); + } + } + graph.removeAllEdges(edgesToRemove); + + // Run through the graph and clean up singular orphaned nodes + final ArrayList verticesToRemove = new ArrayList(); + for( final DeBruijnVertex v : graph.vertexSet() ) { + if( graph.inDegreeOf(v) == 0 && graph.outDegreeOf(v) == 0 ) { + verticesToRemove.add(v); + } + } + graph.removeAllVertices(verticesToRemove); + } + + protected static void eliminateNonRefPaths( final DefaultDirectedGraph graph ) { + final ArrayList verticesToRemove = new ArrayList(); + boolean done = false; + while( !done ) { + done = true; + for( final DeBruijnVertex v : graph.vertexSet() ) { + if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) { + boolean isRefNode = false; + for( final DeBruijnEdge e : graph.edgesOf(v) ) { + if( e.getIsRef() ) { + isRefNode = true; + break; + } + } + if( !isRefNode ) { + done = false; + verticesToRemove.add(v); + } + } + } + graph.removeAllVertices(verticesToRemove); + verticesToRemove.clear(); + } + } + + private static boolean createGraphFromSequences( final DefaultDirectedGraph graph, final Collection reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) { + final byte[] refSequence = refHaplotype.getBases(); + if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) { + final int kmersInSequence = refSequence.length - KMER_LENGTH + 1; + for (int i = 0; i < kmersInSequence - 1; i++) { + // get the kmers + final byte[] kmer1 = new byte[KMER_LENGTH]; + System.arraycopy(refSequence, i, kmer1, 0, KMER_LENGTH); + final byte[] kmer2 = new byte[KMER_LENGTH]; + System.arraycopy(refSequence, i+1, kmer2, 0, KMER_LENGTH); + if( !addKmersToGraph(graph, kmer1, kmer2, true) ) { + if( DEBUG ) { + System.out.println("Cycle detected in reference graph for kmer = " + KMER_LENGTH + " ...skipping"); + } + return false; + } + } + } + + for( final GATKSAMRecord read : reads ) { + final byte[] sequence = read.getReadBases(); + final byte[] qualities = read.getBaseQualities(); + final byte[] reducedReadCounts = read.getReducedReadCounts(); // will be null if read is not readuced + if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) { + final int kmersInSequence = sequence.length - KMER_LENGTH + 1; + for( int iii = 0; iii < kmersInSequence - 1; iii++ ) { + // if the qualities of all the bases in the kmers are high enough + boolean badKmer = false; + for( int jjj = iii; jjj < iii + KMER_LENGTH + 1; jjj++) { + if( qualities[jjj] < MIN_QUALITY ) { + badKmer = true; + break; + } + } + int countNumber = 1; + if (read.isReducedRead()) { + // compute mean number of reduced read counts in current kmer span + final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1); + // precise rounding can make a difference with low consensus counts + countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length); + } + + if( !badKmer ) { + // get the kmers + final byte[] kmer1 = new byte[KMER_LENGTH]; + System.arraycopy(sequence, iii, kmer1, 0, KMER_LENGTH); + final byte[] kmer2 = new byte[KMER_LENGTH]; + System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH); + + for (int k=0; k < countNumber; k++) + addKmersToGraph(graph, kmer1, kmer2, false); + } + } + } + } + return true; + } + + protected static boolean addKmersToGraph( final DefaultDirectedGraph graph, final byte[] kmer1, final byte[] kmer2, final boolean isRef ) { + + final int numVertexBefore = graph.vertexSet().size(); + final DeBruijnVertex v1 = new DeBruijnVertex( kmer1, kmer1.length ); + graph.addVertex(v1); + final DeBruijnVertex v2 = new DeBruijnVertex( kmer2, kmer2.length ); + graph.addVertex(v2); + if( isRef && graph.vertexSet().size() == numVertexBefore ) { return false; } + + final DeBruijnEdge targetEdge = graph.getEdge(v1, v2); + if ( targetEdge == null ) { + graph.addEdge(v1, v2, new DeBruijnEdge( isRef )); + } else { + if( isRef ) { + targetEdge.setIsRef( true ); + } + targetEdge.setMultiplicity(targetEdge.getMultiplicity() + 1); + } + return true; + } + + protected void printGraphs() { + int count = 0; + for( final DefaultDirectedGraph graph : graphs ) { + GRAPH_WRITER.println("digraph kmer" + count++ +" {"); + for( final DeBruijnEdge edge : graph.edgeSet() ) { + if( edge.getMultiplicity() > PRUNE_FACTOR ) { + GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];"); + } + if( edge.getIsRef() ) { + GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];"); + } + if( !edge.getIsRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); } + } + for( final DeBruijnVertex v : graph.vertexSet() ) { + final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() ); + GRAPH_WRITER.println("\t" + v.toString() + " [label=\"" + label + "\"]"); + } + GRAPH_WRITER.println("}"); + } + } + + @Ensures({"result.contains(refHaplotype)"}) + private ArrayList findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final ArrayList activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) { + final ArrayList returnHaplotypes = new ArrayList(); + + // add the reference haplotype separately from all the others + final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( fullReferenceWithPadding, refHaplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); + refHaplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); + refHaplotype.setCigar( swConsensus.getCigar() ); + if( !returnHaplotypes.add( refHaplotype ) ) { + throw new ReviewedStingException("Unable to add reference haplotype during assembly: " + refHaplotype); + } + + final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); + final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength(); + + for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype + for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { + final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart()); + if( !addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) { + return returnHaplotypes; + //throw new ReviewedStingException("Unable to add reference+allele haplotype during GGA-enabled assembly: " + insertedRefHaplotype); + } + } + } + + for( final DefaultDirectedGraph graph : graphs ) { + for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) { + final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() ); + if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) { + if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present + final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place + for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present + final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); + if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) { + for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { + addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ); + } + } + } + } + } + } + } + + if( DEBUG ) { + if( returnHaplotypes.size() > 1 ) { + System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes to evaluate every read against."); + } else { + System.out.println("Found only the reference haplotype in the assembly graph."); + } + for( final Haplotype h : returnHaplotypes ) { + System.out.println( h.toString() ); + System.out.println( "> Cigar = " + h.getCigar() ); + } + } + + return returnHaplotypes; + } + + private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList haplotypeList, final int activeRegionStart, final int activeRegionStop ) { + if( haplotype == null ) { return false; } + + final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); + haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); + haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) ); + + if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 ) { // protect against SW failures + return false; + } + + final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true ); + int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true ); + if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) { + hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal + } + byte[] newHaplotypeBases; + // extend partial haplotypes to contain the full active region sequence + int leftBreakPoint = 0; + int rightBreakPoint = 0; + if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { + newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), + haplotype.getBases()), + ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) ); + leftBreakPoint = swConsensus.getAlignmentStart2wrt1() - activeRegionStart; + rightBreakPoint = leftBreakPoint + haplotype.getBases().length; + //newHaplotypeBases = haplotype.getBases(); + //return false; // piece of haplotype isn't anchored within the active region so don't build a haplotype out of it + } else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { + //return false; + newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) ); + //newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), 0, hapStop); + leftBreakPoint = swConsensus.getAlignmentStart2wrt1() - activeRegionStart; + } else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { + //return false; + newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) ); + //newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length); + rightBreakPoint = haplotype.getBases().length - hapStart; + } else { + newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop); + } + + final Haplotype h = new Haplotype( newHaplotypeBases ); + final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); + + h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() ); + h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) ); + h.leftBreakPoint = leftBreakPoint; + h.rightBreakPoint = rightBreakPoint; + if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures + return false; + } + + if( !haplotypeList.contains(h) ) { + haplotypeList.add(h); + return true; + } else { + return false; + } + } +} \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java new file mode 100644 index 000000000..cf6d1cd77 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -0,0 +1,134 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * @author ebanks + * @since 7/16/12 + */ +public class BQSRIntegrationTest extends WalkerTest { + + private static class BQSRTest { + final String reference; + final String interval; + final String bam; + final String args; + final String md5; + + private BQSRTest(String reference, String bam, String interval, String args, String md5) { + this.reference = reference; + this.bam = bam; + this.interval = interval; + this.args = args; + this.md5 = md5; + } + + public String getCommandLine() { + return " -T BaseRecalibrator" + + " -R " + reference + + " -I " + bam + + " -L " + interval + + args + + " --no_plots" + + " -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) + + " -o %s"; + } + + @Override + public String toString() { + return String.format("BQSR(bam='%s', args='%s')", bam, args); + } + } + + @DataProvider(name = "BQSRTest") + public Object[][] createBQSRTestData() { + String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; + String HiSeqInterval = "chr1:10,000,000-10,100,000"; + return new Object[][]{ + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")}, + {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")}, + }; + } + + @Test(dataProvider = "BQSRTest") + public void testBQSR(BQSRTest params) { + WalkerTestSpec spec = new WalkerTestSpec( + params.getCommandLine(), + Arrays.asList(params.md5)); + executeTest("testBQSR-"+params.args, spec).getFirst(); + + WalkerTestSpec specNT2 = new WalkerTestSpec( + params.getCommandLine() + " -nt 2", + Arrays.asList(params.md5)); + executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst(); + } + + @Test + public void testBQSRFailWithoutDBSNP() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + " -T BaseRecalibrator" + + " -R " + b36KGReference + + " -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" + + " -L 1:10,000,000-10,200,000" + + " --no_plots" + + " -o %s", + 1, // just one output file + UserException.CommandLineException.class); + executeTest("testBQSRFailWithoutDBSNP", spec); + } + + private static class PRTest { + final String args; + final String md5; + + private PRTest(String args, String md5) { + this.args = args; + this.md5 = md5; + } + + @Override + public String toString() { + return String.format("PrintReads(args='%s')", args); + } + } + + @DataProvider(name = "PRTest") + public Object[][] createPRTestData() { + return new Object[][]{ + {new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")}, + {new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")}, + {new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")}, + {new PRTest(" -DIQ", "66aa65223f192ee39c1773aa187fd493")} + }; + } + + @Test(dataProvider = "PRTest") + public void testPR(PRTest params) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T PrintReads" + + " -R " + hg18Reference + + " -I " + privateTestDir + "HiSeq.1mb.1RG.bam" + + " -BQSR " + privateTestDir + "HiSeq.1mb.1RG.table" + + params.args + + " -o %s", + Arrays.asList(params.md5)); + executeTest("testPrintReads-"+params.args, spec).getFirst(); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 08f7ddd37..5d16ba019 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -11,6 +11,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest { final String DELETION_BAM = validationDataLocation + "filtered_deletion_for_reduce_reads.bam"; final String STASH_BAM = validationDataLocation + "ReduceReadsStashBug.bam"; final String STASH_L = " -L 14:73718184-73718284 -L 14:73718294-73718330 -L 14:73718360-73718556"; + final String DIVIDEBYZERO_BAM = validationDataLocation + "ReduceReadsDivideByZeroBug.bam"; + final String DIVIDEBYZERO_L = " -L " + validationDataLocation + "ReduceReadsDivideByZeroBug.intervals"; final String L = " -L 20:10,100,000-10,120,000 "; private void RRTest(String testName, String args, String md5) { @@ -21,28 +23,28 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testDefaultCompression() { - RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f"); + RRTest("testDefaultCompression ", L, "72eb6db9d7a09a0cc25eaac1aafa97b7"); } @Test(enabled = true) public void testMultipleIntervals() { String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; - RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76"); + RRTest("testMultipleIntervals ", intervals, "104b1a1d9fa5394c6fea95cd32967b78"); } @Test(enabled = true) public void testHighCompression() { - RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047"); + RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "c55140cec60fa8c35161680289d74d47"); } @Test(enabled = true) public void testLowCompression() { - RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "afd39459c841b68a442abdd5ef5f8f27"); + RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "0f2e57b7f6de03cc4da1ffcc8cf8f1a7"); } @Test(enabled = true) public void testIndelCompression() { - RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973"); + RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "dda0c95f56f90e5f633c2437c2b21031"); } @Test(enabled = true) @@ -64,5 +66,16 @@ public class ReduceReadsIntegrationTest extends WalkerTest { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6"))); } + + /** + * Divide by zero bug reported by GdA and users in the forum. Happens when the downsampler goes over a region where all reads get + * filtered out. + */ + @Test(enabled = true) + public void testDivideByZero() { + String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; + executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("137505c3efd1e9f8d9209dbdf8419ff9"))); + } + } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java new file mode 100644 index 000000000..983f562d2 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java @@ -0,0 +1,156 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; +import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: delangel + * Date: 3/28/12 + * Time: 7:44 AM + * To change this template use File | Settings | File Templates. + */ +public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest { + + static double[] AA1, AB1, BB1; + static double[] AA2, AB2, AC2, BB2, BC2, CC2; + static double[] A4_1, B4_1, C4_1, D4_1, E4_1,F4_1; + static double[] A4_400, B4_310, C4_220, D4_130, E4_121, F4_013; + static final int numSamples = 4; + static final int samplePloidy = 4; // = 2*samplesPerPool + + @BeforeSuite + public void before() { + // legacy diploid cases + AA1 = new double[]{-5.0, -20.0, -20.0}; + AB1 = new double[]{-20.0, 0.0, -20.0}; + BB1 = new double[]{-20.0, -20.0, 0.0}; + + // diploid, nAlleles = 3. Ordering is [2 0 0] [1 1 0] [0 2 0] [1 0 1] [0 1 1] [0 0 2], ie AA AB BB AC BC CC + AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0}; + AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0}; + AC2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0}; + BB2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0}; + BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; + CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0}; + + // pool (i.e. polyploid cases) + // NAlleles = 2, ploidy=4 + // ordering is [4 0] [3 1] [2 2 ] [1 3] [0 4] + + A4_1 = new double[]{-3.0, -20.0, -20.0, -20.0, -20.0}; + B4_1 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0}; + C4_1 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0}; + D4_1 = new double[]{-20.0, -20.0, 0.0, 0.0, -20.0}; + E4_1 = new double[]{-20.0, -20.0, 0.0, 0.0, -20.0}; + F4_1 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0}; + + // NAlleles = 3, ploidy = 4 + // ordering is [4 0 0] [3 1 0] [2 2 0] [1 3 0] [0 4 0] [3 0 1] [2 1 1] [1 2 1] [0 3 1] [2 0 2] [1 1 2] [0 2 2] [1 0 3] [0 1 3] [0 0 4] + A4_400 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; + B4_310 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; + C4_220 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; + D4_130 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; + E4_121 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; + F4_013 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; + + } + + private class GetGLsTest extends TestDataProvider { + GenotypesContext GLs; + int numAltAlleles; + String name; + int ploidy; + private GetGLsTest(String name, int numAltAlleles, int ploidy, Genotype... arg) { + super(GetGLsTest.class, name); + GLs = GenotypesContext.create(arg); + this.name = name; + this.numAltAlleles = numAltAlleles; + this.ploidy = ploidy; + } + + public String toString() { + return String.format("%s input=%s", super.toString(), GLs); + } + } + + private static Genotype createGenotype(String name, double[] gls, int ploidy) { + Allele[] alleles = new Allele[ploidy]; + + for (int i=0; i < ploidy; i++) + alleles[i] = Allele.NO_CALL; + + return new GenotypeBuilder(name, Arrays.asList(alleles)).PL(gls).make(); + } + + @DataProvider(name = "getGLs") + public Object[][] createGLsData() { + + // bi-allelic diploid case + new GetGLsTest("B0", 1, 2, createGenotype("AA1", AA1,2), createGenotype("AA2", AA1,2), createGenotype("AA3", AA1,2)); + new GetGLsTest("B1", 1, 2, createGenotype("AA1", AA1,2), createGenotype("AA2", AA1,2), createGenotype("AB", AB1,2)); + new GetGLsTest("B2", 1, 2, createGenotype("AA1", AA1,2), createGenotype("BB", BB1,2), createGenotype("AA2", AA1,2)); + new GetGLsTest("B3a", 1, 2, createGenotype("AB", AB1,2), createGenotype("AA", AA1,2), createGenotype("BB", BB1,2)); + new GetGLsTest("B3b", 1, 2, createGenotype("AB1", AB1,2), createGenotype("AB2", AB1,2), createGenotype("AB3", AB1,2)); + new GetGLsTest("B4", 1, 2, createGenotype("BB1", BB1,2), createGenotype("BB2", BB1,2), createGenotype("AA", AA1,2)); + new GetGLsTest("B5", 1, 2, createGenotype("BB1", BB1,2), createGenotype("AB", AB1,2), createGenotype("BB2", BB1,2)); + new GetGLsTest("B6", 1, 2, createGenotype("BB1", BB1,2), createGenotype("BB2", BB1,2), createGenotype("BB3", BB1,2)); + + // tri-allelic diploid case + new GetGLsTest("B1C0", 2, 2, createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("AB", AB2,2)); + new GetGLsTest("B0C1", 2, 2, createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("AC", AC2,2)); + new GetGLsTest("B1C1a", 2,2, createGenotype("AA", AA2,2), createGenotype("AB", AB2,2), createGenotype("AC", AC2,2)); + new GetGLsTest("B1C1b", 2,2, createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("BC", BC2,2)); + new GetGLsTest("B2C1", 2, 2, createGenotype("AB1", AB2,2), createGenotype("AB2", AB2,2), createGenotype("AC", AC2,2)); + new GetGLsTest("B3C2a", 2, 2, createGenotype("AB", AB2,2), createGenotype("BC1", BC2,2), createGenotype("BC2", BC2,2)); + new GetGLsTest("B3C2b", 2, 2, createGenotype("AB", AB2,2), createGenotype("BB", BB2,2), createGenotype("CC", CC2,2)); + + // bi-allelic pool case + new GetGLsTest("P0", 1, samplePloidy, createGenotype("A4_1", A4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy)); + new GetGLsTest("P1", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("B4_1", B4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy)); + new GetGLsTest("P2a", 1,samplePloidy, createGenotype("A4_1", A4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy)); + new GetGLsTest("P2b", 1, samplePloidy,createGenotype("B4_1", B4_1,samplePloidy), createGenotype("B4_1", B4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy)); + new GetGLsTest("P4", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy)); + new GetGLsTest("P6", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy)); + new GetGLsTest("P8", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy)); + + // multi-allelic pool case + new GetGLsTest("B1C3", 2, samplePloidy,createGenotype("A4_400", A4_400,samplePloidy), createGenotype("A4_400", A4_400,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy)); + new GetGLsTest("B3C9", 2, samplePloidy,createGenotype("F4_013", F4_013,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy)); + new GetGLsTest("B6C0", 2, samplePloidy,createGenotype("B4_310", B4_310,samplePloidy), createGenotype("C4_220", C4_220,samplePloidy), createGenotype("D4_130", D4_130,samplePloidy)); + new GetGLsTest("B6C4", 2, samplePloidy,createGenotype("D4_130", D4_130,samplePloidy), createGenotype("E4_121", E4_121,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy)); + new GetGLsTest("B4C7", 2, samplePloidy,createGenotype("F4_013", F4_013,samplePloidy), createGenotype("E4_121", E4_121,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy)); + new GetGLsTest("B2C3", 2, samplePloidy,createGenotype("A4_400", A4_400,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy), createGenotype("B4_310", B4_310,samplePloidy)); + + return GetGLsTest.getTests(GetGLsTest.class); + } + + @Test(dataProvider = "getGLs") + public void testGLs(GetGLsTest cfg) { + + final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles); + final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size()); + double[] priors = new double[len]; // flat priors + + GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result); + int nameIndex = 1; + for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { + int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); + int calculatedAlleleCount = result.getAlleleCountsOfMAP()[allele]; + +// System.out.format( "%s Expected:%d Calc:%d\n",cfg.toString(),expectedAlleleCount, calculatedAlleleCount); + Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); + } + } + + +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java new file mode 100644 index 000000000..f95ba66b2 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java @@ -0,0 +1,514 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import net.sf.samtools.SAMUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.*; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.PrintStream; +import java.util.*; + + +public class GeneralPloidyGenotypeLikelihoodsUnitTest { + + final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); + final Logger logger = Logger.getLogger(Walker.class); + private static final boolean VERBOSE = false; + private static final boolean SIMULATE_NOISY_PILEUP = false; + private static final int NUM_SIMULATED_OBS = 10; + + void PoolGenotypeLikelihoodsUnitTest() { + UAC.minQualityScore = 5; + UAC.maxQualityScore = 40; + UAC.phredScaledPrior = (byte)20; + UAC.minPower = 0.0; + + } + @Test + public void testStoringLikelihoodElements() { + + + // basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back + + int ploidy = 20; + int numAlleles = 4; + int res = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy); + // System.out.format("Alt Alleles: %d, Ploidy: %d, #Likelihoods: %d\n", numAltAlleles, ploidy, res); + + List alleles = new ArrayList(); + alleles.add(Allele.create("T",true)); + alleles.add(Allele.create("C",false)); + alleles.add(Allele.create("A",false)); + alleles.add(Allele.create("G",false)); + + double[] gls = new double[res]; + + for (int k=0; k < gls.length; k++) + gls[k]= (double)k; + + GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true); + double[] glnew = gl.getLikelihoods(); + + Assert.assertEquals(gls, glnew); + } + + @Test + public void testElementStorageCache() { + // compare cached element storage with compuationally hard-coded iterative computation + + for (int ploidy = 2; ploidy < 10; ploidy++) { + for (int nAlleles = 2; nAlleles < 10; nAlleles++) + Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy), + GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy)); + } + + } + + @Test + public void testVectorToLinearIndex() { + + // create iterator, compare linear index given by iterator with closed form function + int numAlleles = 4; + int ploidy = 2; + GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy); + + while(iterator.hasNext()) { + System.out.format("\n%d:",iterator.getLinearIndex()); + int[] a = iterator.getCurrentVector(); + for (int aa: a) + System.out.format("%d ",aa); + + + int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy); + System.out.format("Computed idx = %d\n",computedIdx); + iterator.next(); + } + + } + @Test + public void testSubsetToAlleles() { + + int ploidy = 2; + int numAlleles = 4; + int res = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy); + // System.out.format("Alt Alleles: %d, Ploidy: %d, #Likelihoods: %d\n", numAltAlleles, ploidy, res); + + List originalAlleles = new ArrayList(); + originalAlleles.add(Allele.create("T",true)); + originalAlleles.add(Allele.create("C",false)); + originalAlleles.add(Allele.create("A",false)); + originalAlleles.add(Allele.create("G",false)); + + double[] oldLikelihoods = new double[res]; + + for (int k=0; k < oldLikelihoods.length; k++) + oldLikelihoods[k]= (double)k; + + List allelesToSubset = new ArrayList(); + allelesToSubset.add(Allele.create("A",false)); + allelesToSubset.add(Allele.create("C",false)); + + double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy, + originalAlleles, allelesToSubset); + + + /* + For P=2, N=4, default iteration order: + 0:2 0 0 0 + 1:1 1 0 0 + 2:0 2 0 0 + 3:1 0 1 0 + 4:0 1 1 0 + 5:0 0 2 0 + 6:1 0 0 1 + 7:0 1 0 1 + 8:0 0 1 1 + 9:0 0 0 2 + + For P=2,N=2, iteration order is: + 0:2 0 + 1:1 1 + 2:0 2 + + From first list, if we're extracting alleles 2 and 1, we need all elements that have zero at positions 0 and 3. + These are only elements {2,4,5}. Since test is flipping alleles 2 and 1, order is reversed. + */ + Assert.assertEquals(newGLs,new double[]{5.0,4.0,2.0}); + } + @Test + public void testIndexIterator() { + int[] seed = new int[]{1,2,3,4}; + GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1); + + seed = new int[]{1,0,1,1}; + iterator = runIterator(seed,-1); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1); + + seed = new int[]{5}; + iterator = runIterator(seed,-1); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1); + + // Diploid, # alleles = 4 + seed = new int[]{2,2,2,2}; + iterator = runIterator(seed,2); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),9); + + // Diploid, # alleles = 2 + seed = new int[]{2,2}; + iterator = runIterator(seed,2); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),2); + + // Diploid, # alleles = 3 + seed = new int[]{2,2,2}; + iterator = runIterator(seed,2); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),5); + + // Triploid, # alleles = 2 + seed = new int[]{3,3}; + iterator = runIterator(seed,3); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),3); + // Triploid, # alleles = 3 + seed = new int[]{3,3,3}; + iterator = runIterator(seed,3); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),9); + + // Triploid, # alleles = 4 + seed = new int[]{3,3,3,3}; + iterator = runIterator(seed,3); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),19); + + // 8-ploid, # alleles = 6 + seed = new int[]{8,8,8,8,8,8}; + iterator = runIterator(seed,8); + // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); + Assert.assertEquals(iterator.getLinearIndex(),1286); + + + } + + private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) { + GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo); + + while(iterator.hasNext()) { + int[] a = iterator.getCurrentVector(); + int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo); + if (VERBOSE) { + System.out.format("%d:",iterator.getLinearIndex()); + for (int i=0; i < seed.length; i++) + System.out.format("%d ",a[i]); + System.out.format(" LI:%d\n", idx); + } + iterator.next(); + } + + return iterator; + + } + + private static int prod(int[] x) { + int prod = 1; + for (int xx : x) { + prod *= (1+xx); + } + return prod; + } + + @Test + public void testErrorModel() { + final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); + final byte refByte = refPileupTestProvider.getRefByte(); + final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; + final String refSampleName = refPileupTestProvider.getSampleNames().get(0); + final List trueAlleles = new ArrayList(); + trueAlleles.add(Allele.create(refByte, true)); + + final VariantContext refVC = new VariantContextBuilder("test","chr1",5, 5, + trueAlleles).genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make(); + final int[] matchArray = {95, 995, 9995, 10000}; + final int[] mismatchArray = {1,5,10,20}; + if (VERBOSE) System.out.println("Running SNP error model test"); + + for (int matches: matchArray) { + for (int mismatches: mismatchArray) { + // get artificial alignment context for ref sample - no noise + Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30); + final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext()); + final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); + + final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); + final int peakIdx = (int)Math.round(mlEst); + if (VERBOSE) System.out.format("Matches:%d Mismatches:%d maxV:%d peakIdx:%d\n",matches, mismatches, MathUtils.maxElementIndex(errorVec),peakIdx); + Assert.assertEquals(MathUtils.maxElementIndex(errorVec),peakIdx); + + } + } + + + } + + @Test + public void testIndelErrorModel() { + final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); + final byte refByte = refPileupTestProvider.getRefByte(); + final String altBases = "TCA"; + final String refSampleName = refPileupTestProvider.getSampleNames().get(0); + final List trueAlleles = new ArrayList(); + trueAlleles.add(Allele.create(refByte, true)); + trueAlleles.add(Allele.create((char)refByte + "TC", false)); + + final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases()); + final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), + refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles). + genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make(); + + + final int[] matchArray = {95, 995, 9995, 10000}; + final int[] mismatchArray = {1,5,10,20}; + + if (VERBOSE) System.out.println("Running indel error model test"); + for (int matches: matchArray) { + for (int mismatches: mismatchArray) { + // get artificial alignment context for ref sample - no noise + // CASE 1: Test HET insertion + // Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches + Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30); + final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext()); + final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); + + final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); + final int peakIdx = (int)Math.round(mlEst); + if (VERBOSE) System.out.format("Matches:%d Mismatches:%d peakIdx:%d\n",matches, mismatches, peakIdx); + Assert.assertEquals(MathUtils.maxElementIndex(errorVec),peakIdx); + + // CASE 2: Test HET deletion + + } + } + + // create deletion VC + final int delLength = 4; + final List delAlleles = new ArrayList(); + delAlleles.add(Allele.create(fw.substring(0,delLength+1), true)); + delAlleles.add(Allele.create(refByte, false)); + + final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), + refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles). + genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make(); + + for (int matches: matchArray) { + for (int mismatches: mismatchArray) { + // get artificial alignment context for ref sample - no noise + // CASE 1: Test HET deletion + // Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches + Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30); + final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext()); + final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); + + final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); + final int peakIdx = (int)Math.round(mlEst); + if (VERBOSE) System.out.format("Matches:%d Mismatches:%d peakIdx:%d\n",matches, mismatches, peakIdx); + Assert.assertEquals(MathUtils.maxElementIndex(errorVec),peakIdx); + + // CASE 2: Test HET deletion + + } + } + + } + + @Test + public void testAddPileupToPoolGL() { + + // dummy error model - Q=infinity FAPP so that there's no source of uncertainty + final double[] emv = new double[SAMUtils.MAX_PHRED_SCORE+1]; + + // error rate for noisy tests + final int PHRED_SITE_ERROR_RATE = 20; + + Arrays.fill(emv, Double.NEGATIVE_INFINITY); + emv[SAMUtils.MAX_PHRED_SCORE] = 0; + + final int numSamples = 1; + + // have a high quality site say Q40 site, and create artificial pileups for one single sample, at coverage N, with given + // true pool AC = x. + + final ArtificialReadPileupTestProvider readPileupTestProvider = new ArtificialReadPileupTestProvider(numSamples,"sample", (byte)SAMUtils.MAX_PHRED_SCORE); + final ErrorModel noiselessErrorModel = new ErrorModel(emv); + + final double[] emverr = new double[SAMUtils.MAX_PHRED_SCORE+1]; + Arrays.fill(emverr, Double.NEGATIVE_INFINITY); + emverr[PHRED_SITE_ERROR_RATE] = 0; + final ErrorModel Q30ErrorModel = new ErrorModel(emverr); + + + final int eventLength = 0; // test snp only + final byte refByte = readPileupTestProvider.getRefByte(); + final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; + + + final List allAlleles = new ArrayList(); // this contains only ref Allele up to now + final Set laneIDs = new TreeSet(); + laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE); + + final HashMap noiselessErrorModels = new HashMap(); + + // build per-lane error model for all lanes present in ref sample + for (String laneID : laneIDs) + noiselessErrorModels.put(laneID, noiselessErrorModel); + + final HashMap noisyErrorModels = new HashMap(); + + // build per-lane error model for all lanes present in ref sample + for (String laneID : laneIDs) + noisyErrorModels.put(laneID, Q30ErrorModel); + + // all first ref allele + allAlleles.add(Allele.create(refByte,true)); + for (byte b: BaseUtils.BASES) { + if (refByte != b) + allAlleles.add(Allele.create(b, false)); + } + + final int refIdx = 0; + int altIdx = -1; + + for (int k=0; k < allAlleles.size(); k++) + if (altByte == allAlleles.get(k).getBases()[0]) { + altIdx = k; + break; + } + + + + PrintStream out = null; + if (SIMULATE_NOISY_PILEUP) { + try { + out = new PrintStream(new File("GLUnitTest.table")); + // out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table")); + } + catch (Exception e) {} + // write header + out.format("Depth\tPoolPloidy\tACTrue\tACEst\tREF\tALTTrue\tALTEst\n"); + } + final int[] depthVector = {1000,10000}; + //final double[] alleleFrequencyVector = {0.01,0.1,0.5,1.0}; + final int[] spVector = {10,100}; + //final int[] spVector = {1}; + for (int depth : depthVector) { + for (int nSamplesPerPool : spVector) { + final int ploidy = 2*nSamplesPerPool; + for (int ac =2; ac <=ploidy; ac++) { + + // simulate pileup with given AC and depth + int altDepth = (int)Math.round( (double)ac/(double)ploidy * (double)depth); + final int[] numReadsPerAllele = {depth-altDepth,altDepth}; + final Map alignmentContextMap = + readPileupTestProvider.getAlignmentContextFromAlleles(eventLength, new String(new byte[]{altByte}), numReadsPerAllele); + + // get now likelihoods for this + + final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true); + final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE); + if (VERBOSE) { + System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool); + System.out.println(GL.toString()); + } + Assert.assertEquals(nGoodBases, depth); + Pair mlPair = GL.getMostLikelyACCount(); + + // Most likely element has to be conformation REF = nSamples-AC,ALT = AC + if (ac == 0) { + Assert.assertEquals(mlPair.first[refIdx],ploidy); + } else { + Assert.assertEquals(mlPair.first[altIdx],ac); + Assert.assertEquals(mlPair.first[refIdx],ploidy-ac); + } + + + // simulate now pileup with base error rate + if (SIMULATE_NOISY_PILEUP) { + System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\n", depth,ac,altDepth, nSamplesPerPool); + + for (int k=0; k < NUM_SIMULATED_OBS; k++) { + final Map noisyAlignmentContextMap = + readPileupTestProvider.getAlignmentContextFromAlleles(eventLength, new String(new byte[]{altByte}), numReadsPerAllele, + true, PHRED_SITE_ERROR_RATE); + + // get now likelihoods for this + + final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true); + noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE); + mlPair = noisyGL.getMostLikelyACCount(); + + // Most likely element has to be conformation REF = nSamples-AC,ALT = AC + int acEst; + if (ac == 0) { + acEst = mlPair.first[refIdx]; + } else { + acEst = mlPair.first[altIdx]; + } + byte altEst = BaseUtils.baseIndexToSimpleBase(MathUtils.maxElementIndex(mlPair.first)); + out.format("%d\t%d\t%d\t%d\t%c\t%c\t%c\n",depth, ploidy, ac, acEst, refByte, altByte, altEst); + + } + } + } + } + + + } + if (SIMULATE_NOISY_PILEUP) + out.close(); + + + } + + + +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java new file mode 100644 index 000000000..f62b2250e --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -0,0 +1,78 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.WalkerTest; + +import java.util.Arrays; +import org.testng.annotations.Test; + +/** + * Created by IntelliJ IDEA. + * User: delangel + * Date: 4/5/12 + * Time: 11:28 AM + * To change this template use File | Settings | File Templates. + */ +public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { + final static String REF = b37KGReference; + final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list"; + final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam"; + final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf"; + final String REFSAMPLE_NAME = "NA12878"; + final String MTINTERVALS = "MT:1-3000"; + final String LSVINTERVALS = "20:40,000,000-41,000,000"; + final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf"; + final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf"; + final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf"; + private void PC_MT_Test(String bam, String args, String name, String md5) { + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ", + REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testPoolCaller:"+name+" args=" + args, spec); + } + + private void PC_LSV_Test(String args, String name, String model, String md5) { + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ", + REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testPoolCaller:"+name+" args=" + args, spec); + } + + private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) { + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane", + REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testPoolCaller:"+name+" args=" + args, spec); + } + + @Test + public void testBOTH_GGA_Pools() { + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93"); + } + + @Test + public void testINDEL_GGA_Pools() { + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b"); + } + + @Test + public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d"); + } + + @Test + public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4"); + } + + @Test + public void testMT_SNP_DISCOVERY_sp4() { + PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459"); + } + + @Test + public void testMT_SNP_GGA_sp10() { + + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7"); + } + +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java new file mode 100644 index 000000000..539190fe9 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -0,0 +1,400 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 3/15/12 + */ + +import net.sf.picard.reference.ReferenceSequenceFile; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + +/** + * Unit tests for GenotypingEngine + */ +public class GenotypingEngineUnitTest extends BaseTest { + + private static ReferenceSequenceFile seq; + private GenomeLocParser genomeLocParser; + + @BeforeClass + public void init() throws FileNotFoundException { + // sequence + seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); + genomeLocParser = new GenomeLocParser(seq); + } + + @Test + public void testFindHomVarEventAllelesInSample() { + final List eventAlleles = new ArrayList(); + eventAlleles.add( Allele.create("A", true) ); + eventAlleles.add( Allele.create("C", false) ); + final List haplotypeAlleles = new ArrayList(); + haplotypeAlleles.add( Allele.create("AATA", true) ); + haplotypeAlleles.add( Allele.create("AACA", false) ); + haplotypeAlleles.add( Allele.create("CATA", false) ); + haplotypeAlleles.add( Allele.create("CACA", false) ); + final ArrayList haplotypes = new ArrayList(); + haplotypes.add(new Haplotype("AATA".getBytes())); + haplotypes.add(new Haplotype("AACA".getBytes())); + haplotypes.add(new Haplotype("CATA".getBytes())); + haplotypes.add(new Haplotype("CACA".getBytes())); + final List haplotypeAllelesForSample = new ArrayList(); + haplotypeAllelesForSample.add( Allele.create("CATA", false) ); + haplotypeAllelesForSample.add( Allele.create("CACA", false) ); + final ArrayList> alleleMapper = new ArrayList>(); + ArrayList Aallele = new ArrayList(); + Aallele.add(haplotypes.get(0)); + Aallele.add(haplotypes.get(1)); + ArrayList Callele = new ArrayList(); + Callele.add(haplotypes.get(2)); + Callele.add(haplotypes.get(3)); + alleleMapper.add(Aallele); + alleleMapper.add(Callele); + final List eventAllelesForSample = new ArrayList(); + eventAllelesForSample.add( Allele.create("C", false) ); + eventAllelesForSample.add( Allele.create("C", false) ); + + if(!compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) { + logger.warn("calc alleles = " + GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)); + logger.warn("expected alleles = " + eventAllelesForSample); + } + Assert.assertTrue(compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))); + } + + @Test + public void testFindHetEventAllelesInSample() { + final List eventAlleles = new ArrayList(); + eventAlleles.add( Allele.create("A", true) ); + eventAlleles.add( Allele.create("C", false) ); + eventAlleles.add( Allele.create("T", false) ); + final List haplotypeAlleles = new ArrayList(); + haplotypeAlleles.add( Allele.create("AATA", true) ); + haplotypeAlleles.add( Allele.create("AACA", false) ); + haplotypeAlleles.add( Allele.create("CATA", false) ); + haplotypeAlleles.add( Allele.create("CACA", false) ); + haplotypeAlleles.add( Allele.create("TACA", false) ); + haplotypeAlleles.add( Allele.create("TTCA", false) ); + haplotypeAlleles.add( Allele.create("TTTA", false) ); + final ArrayList haplotypes = new ArrayList(); + haplotypes.add(new Haplotype("AATA".getBytes())); + haplotypes.add(new Haplotype("AACA".getBytes())); + haplotypes.add(new Haplotype("CATA".getBytes())); + haplotypes.add(new Haplotype("CACA".getBytes())); + haplotypes.add(new Haplotype("TACA".getBytes())); + haplotypes.add(new Haplotype("TTCA".getBytes())); + haplotypes.add(new Haplotype("TTTA".getBytes())); + final List haplotypeAllelesForSample = new ArrayList(); + haplotypeAllelesForSample.add( Allele.create("TTTA", false) ); + haplotypeAllelesForSample.add( Allele.create("AATA", true) ); + final ArrayList> alleleMapper = new ArrayList>(); + ArrayList Aallele = new ArrayList(); + Aallele.add(haplotypes.get(0)); + Aallele.add(haplotypes.get(1)); + ArrayList Callele = new ArrayList(); + Callele.add(haplotypes.get(2)); + Callele.add(haplotypes.get(3)); + ArrayList Tallele = new ArrayList(); + Tallele.add(haplotypes.get(4)); + Tallele.add(haplotypes.get(5)); + Tallele.add(haplotypes.get(6)); + alleleMapper.add(Aallele); + alleleMapper.add(Callele); + alleleMapper.add(Tallele); + final List eventAllelesForSample = new ArrayList(); + eventAllelesForSample.add( Allele.create("A", true) ); + eventAllelesForSample.add( Allele.create("T", false) ); + + if(!compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) { + logger.warn("calc alleles = " + GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)); + logger.warn("expected alleles = " + eventAllelesForSample); + } + Assert.assertTrue(compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))); + } + + private boolean compareAlleleLists(List l1, List l2) { + if( l1.size() != l2.size() ) { + return false; // sanity check + } + + for( int i=0; i < l1.size(); i++ ){ + if ( !l2.contains(l1.get(i)) ) + return false; + } + return true; + } + + + private class BasicGenotypingTestProvider extends TestDataProvider { + byte[] ref; + byte[] hap; + HashMap expected; + GenotypingEngine ge = new GenotypingEngine(false, 0, false); + + public BasicGenotypingTestProvider(String refString, String hapString, HashMap expected) { + super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString)); + ref = refString.getBytes(); + hap = hapString.getBytes(); + this.expected = expected; + } + + public HashMap calcAlignment() { + final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap); + return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0); + } + } + + @DataProvider(name = "BasicGenotypingTestProvider") + public Object[][] makeBasicGenotypingTests() { + + for( int contextSize : new int[]{0,1,5,9,24,36} ) { + HashMap map = new HashMap(); + map.put(1 + contextSize, (byte)'M'); + final String context = Utils.dupString('G', contextSize); + new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACTAGCCGATAG", map); + } + + for( int contextSize : new int[]{0,1,5,9,24,36} ) { + HashMap map = new HashMap(); + map.put(2 + contextSize, (byte)'M'); + map.put(21 + contextSize, (byte)'M'); + final String context = Utils.dupString('G', contextSize); + new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG", "ATCTCGCATCGCGAGCATCGCCTAGCCGATAG", map); + } + + for( int contextSize : new int[]{0,1,5,9,24,36} ) { + HashMap map = new HashMap(); + map.put(1 + contextSize, (byte)'M'); + map.put(20 + contextSize, (byte)'I'); + final String context = Utils.dupString('G', contextSize); + new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACACTAGCCGATAG", map); + } + + for( int contextSize : new int[]{0,1,5,9,24,36} ) { + HashMap map = new HashMap(); + map.put(1 + contextSize, (byte)'M'); + map.put(20 + contextSize, (byte)'D'); + final String context = Utils.dupString('G', contextSize); + new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCGATAG", map); + } + + for( int contextSize : new int[]{1,5,9,24,36} ) { + HashMap map = new HashMap(); + map.put(1, (byte)'M'); + map.put(20, (byte)'D'); + final String context = Utils.dupString('G', contextSize); + new BasicGenotypingTestProvider("AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCGATAG", map); + } + + for( int contextSize : new int[]{0,1,5,9,24,36} ) { + HashMap map = new HashMap(); + map.put(2 + contextSize, (byte)'M'); + map.put(20 + contextSize, (byte)'I'); + map.put(30 + contextSize, (byte)'D'); + final String context = Utils.dupString('G', contextSize); + new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "ACCTCGCATCGCGAGCATCGTTACTAGCCGATG", map); + } + + for( int contextSize : new int[]{0,1,5,9,24,36} ) { + HashMap map = new HashMap(); + map.put(1 + contextSize, (byte)'M'); + map.put(20 + contextSize, (byte)'D'); + map.put(28 + contextSize, (byte)'M'); + final String context = Utils.dupString('G', contextSize); + new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCCATAG", map); + } + + return BasicGenotypingTestProvider.getTests(BasicGenotypingTestProvider.class); + } + + @Test(dataProvider = "BasicGenotypingTestProvider", enabled = true) + public void testHaplotypeToVCF(BasicGenotypingTestProvider cfg) { + HashMap calculatedMap = cfg.calcAlignment(); + HashMap expectedMap = cfg.expected; + logger.warn(String.format("Test: %s", cfg.toString())); + if(!compareVCMaps(calculatedMap, expectedMap)) { + logger.warn("calc map = " + calculatedMap); + logger.warn("expected map = " + expectedMap); + } + Assert.assertTrue(compareVCMaps(calculatedMap, expectedMap)); + } + + /** + * Tests that we get the right values from the R^2 calculation + */ + @Test + public void testCalculateR2LD() { + logger.warn("Executing testCalculateR2LD"); + + Assert.assertEquals(GenotypingEngine.calculateR2LD(1,1,1,1), 0.0, 0.00001); + Assert.assertEquals(GenotypingEngine.calculateR2LD(100,100,100,100), 0.0, 0.00001); + Assert.assertEquals(GenotypingEngine.calculateR2LD(1,0,0,1), 1.0, 0.00001); + Assert.assertEquals(GenotypingEngine.calculateR2LD(100,0,0,100), 1.0, 0.00001); + Assert.assertEquals(GenotypingEngine.calculateR2LD(1,2,3,4), (0.1 - 0.12) * (0.1 - 0.12) / (0.3 * 0.7 * 0.4 * 0.6), 0.00001); + } + + @Test + public void testCreateMergedVariantContext() { + logger.warn("Executing testCreateMergedVariantContext"); + + final byte[] ref = "AATTCCGGAATTCCGGAATT".getBytes(); + final GenomeLoc refLoc = genomeLocParser.createGenomeLoc("2", 1700, 1700 + ref.length); + + // SNP + SNP = simple MNP + VariantContext thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); + VariantContext nextVC = new VariantContextBuilder().loc("2", 1704, 1704).alleles("C","G").make(); + VariantContext truthVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","GG").source("merged").make(); + VariantContext mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // SNP + ref + SNP = MNP with ref base gap + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); + truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCG").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // insertion + SNP + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); + truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // SNP + insertion + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make(); + truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // deletion + SNP + thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); + truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // SNP + deletion + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); + truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // insertion + deletion = MNP + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); + truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // insertion + deletion + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); + truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // insertion + insertion + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make(); + truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // deletion + deletion + thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); + truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // deletion + insertion (abutting) + thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make(); + nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make(); + truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + + // complex + complex + thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make(); + nextVC = new VariantContextBuilder().loc("2", 1706, 1707).alleles("GG","AC").make(); + truthVC = new VariantContextBuilder().loc("2", 1703, 1707).alleles("TCCGG","AAACAC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + } + + /** + * Private function to compare HashMap of VCs, it only checks the types and start locations of the VariantContext + */ + private boolean compareVCMaps(HashMap calc, HashMap expected) { + if( !calc.keySet().equals(expected.keySet()) ) { return false; } // sanity check + for( Integer loc : expected.keySet() ) { + Byte type = expected.get(loc); + switch( type ) { + case 'I': + if( !calc.get(loc).isSimpleInsertion() ) { return false; } + break; + case 'D': + if( !calc.get(loc).isSimpleDeletion() ) { return false; } + break; + case 'M': + if( !(calc.get(loc).isMNP() || calc.get(loc).isSNP()) ) { return false; } + break; + default: + return false; + } + } + return true; + } +} \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java new file mode 100644 index 000000000..dd2022fa7 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -0,0 +1,47 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class HaplotypeCallerIntegrationTest extends WalkerTest { + final static String REF = b37KGReference; + final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam"; + final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam"; + final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals"; + //final String RECAL_FILE = validationDataLocation + "NA12878.kmer.8.subset.recal_data.bqsr"; + + private void HCTest(String bam, String args, String md5) { + final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testHaplotypeCaller: args=" + args, spec); + } + + @Test + public void testHaplotypeCallerMultiSample() { + HCTest(CEUTRIO_BAM, "", "29ebfabcd4a42d4c5c2a576219cffb3d"); + } + + @Test + public void testHaplotypeCallerSingleSample() { + HCTest(NA12878_BAM, "", "9732313b8a12faa347f6ebe96518c5df"); + } + + @Test + public void testHaplotypeCallerMultiSampleGGA() { + HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "5e1d49d4110cd96c2e25f8e1da217e9e"); + } + + private void HCTestComplexVariants(String bam, String args, String md5) { + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 3"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec); + } + + @Test + public void testHaplotypeCallerMultiSampleComplex() { + HCTestComplexVariants(CEUTRIO_BAM, "", "53df51e6071664725f6e7497f5ee5adf"); + } +} + diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java new file mode 100644 index 000000000..e82946690 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java @@ -0,0 +1,174 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 3/14/12 + */ + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.MathUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Unit tests for LikelihoodCalculationEngine + */ +public class LikelihoodCalculationEngineUnitTest extends BaseTest { + + @Test + public void testNormalizeDiploidLikelihoodMatrixFromLog10() { + double[][] likelihoodMatrix = { + {-90.2, 0, 0}, + {-190.1, -2.1, 0}, + {-7.0, -17.5, -35.9} + }; + double[][] normalizedMatrix = { + {-88.1, 0, 0}, + {-188.0, 0.0, 0}, + {-4.9, -15.4, -33.8} + }; + + + Assert.assertTrue(compareDoubleArrays(LikelihoodCalculationEngine.normalizeDiploidLikelihoodMatrixFromLog10(likelihoodMatrix), normalizedMatrix)); + + double[][] likelihoodMatrix2 = { + {-90.2, 0, 0, 0}, + {-190.1, -2.1, 0, 0}, + {-7.0, -17.5, -35.9, 0}, + {-7.0, -17.5, -35.9, -1000.0}, + }; + double[][] normalizedMatrix2 = { + {-88.1, 0, 0, 0}, + {-188.0, 0.0, 0, 0}, + {-4.9, -15.4, -33.8, 0}, + {-4.9, -15.4, -33.8, -997.9}, + }; + Assert.assertTrue(compareDoubleArrays(LikelihoodCalculationEngine.normalizeDiploidLikelihoodMatrixFromLog10(likelihoodMatrix2), normalizedMatrix2)); + } + + private class BasicLikelihoodTestProvider extends TestDataProvider { + public Double readLikelihoodForHaplotype1; + public Double readLikelihoodForHaplotype2; + public Double readLikelihoodForHaplotype3; + + public BasicLikelihoodTestProvider(double a, double b) { + super(BasicLikelihoodTestProvider.class, String.format("Diploid haplotype likelihoods for reads %f / %f",a,b)); + readLikelihoodForHaplotype1 = a; + readLikelihoodForHaplotype2 = b; + readLikelihoodForHaplotype3 = null; + } + + public BasicLikelihoodTestProvider(double a, double b, double c) { + super(BasicLikelihoodTestProvider.class, String.format("Diploid haplotype likelihoods for reads %f / %f / %f",a,b,c)); + readLikelihoodForHaplotype1 = a; + readLikelihoodForHaplotype2 = b; + readLikelihoodForHaplotype3 = c; + } + + public double[][] expectedDiploidHaplotypeMatrix() { + if( readLikelihoodForHaplotype3 == null ) { + double maxValue = Math.max(readLikelihoodForHaplotype1,readLikelihoodForHaplotype2); + double[][] normalizedMatrix = { + {readLikelihoodForHaplotype1 - maxValue, Double.NEGATIVE_INFINITY}, + {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype2)) - maxValue, readLikelihoodForHaplotype2 - maxValue} + }; + return normalizedMatrix; + } else { + double maxValue = MathUtils.max(readLikelihoodForHaplotype1,readLikelihoodForHaplotype2,readLikelihoodForHaplotype3); + double[][] normalizedMatrix = { + {readLikelihoodForHaplotype1 - maxValue, Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY}, + {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype2)) - maxValue, readLikelihoodForHaplotype2 - maxValue, Double.NEGATIVE_INFINITY}, + {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype3)) - maxValue, + Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype2) + 0.5*Math.pow(10,readLikelihoodForHaplotype3)) - maxValue, readLikelihoodForHaplotype3 - maxValue} + }; + return normalizedMatrix; + } + } + + public double[][] calcDiploidHaplotypeMatrix() { + ArrayList haplotypes = new ArrayList(); + for( int iii = 1; iii <= 3; iii++) { + Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) ); + int readCount = 1; + if( readLikelihood != null ) { + Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() ); + haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}, new int[]{readCount}); + haplotypes.add(haplotype); + } + } + return LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, "myTestSample"); + } + } + + @DataProvider(name = "BasicLikelihoodTestProvider") + public Object[][] makeBasicLikelihoodTests() { + new BasicLikelihoodTestProvider(-1.1, -2.2); + new BasicLikelihoodTestProvider(-2.2, -1.1); + new BasicLikelihoodTestProvider(-1.1, -1.1); + new BasicLikelihoodTestProvider(-9.7, -15.0); + new BasicLikelihoodTestProvider(-1.1, -2000.2); + new BasicLikelihoodTestProvider(-1000.1, -2.2); + new BasicLikelihoodTestProvider(0, 0); + new BasicLikelihoodTestProvider(-1.1, 0); + new BasicLikelihoodTestProvider(0, -2.2); + new BasicLikelihoodTestProvider(-100.1, -200.2); + + new BasicLikelihoodTestProvider(-1.1, -2.2, 0); + new BasicLikelihoodTestProvider(-2.2, -1.1, 0); + new BasicLikelihoodTestProvider(-1.1, -1.1, 0); + new BasicLikelihoodTestProvider(-9.7, -15.0, 0); + new BasicLikelihoodTestProvider(-1.1, -2000.2, 0); + new BasicLikelihoodTestProvider(-1000.1, -2.2, 0); + new BasicLikelihoodTestProvider(0, 0, 0); + new BasicLikelihoodTestProvider(-1.1, 0, 0); + new BasicLikelihoodTestProvider(0, -2.2, 0); + new BasicLikelihoodTestProvider(-100.1, -200.2, 0); + + new BasicLikelihoodTestProvider(-1.1, -2.2, -12.121); + new BasicLikelihoodTestProvider(-2.2, -1.1, -12.121); + new BasicLikelihoodTestProvider(-1.1, -1.1, -12.121); + new BasicLikelihoodTestProvider(-9.7, -15.0, -12.121); + new BasicLikelihoodTestProvider(-1.1, -2000.2, -12.121); + new BasicLikelihoodTestProvider(-1000.1, -2.2, -12.121); + new BasicLikelihoodTestProvider(0, 0, -12.121); + new BasicLikelihoodTestProvider(-1.1, 0, -12.121); + new BasicLikelihoodTestProvider(0, -2.2, -12.121); + new BasicLikelihoodTestProvider(-100.1, -200.2, -12.121); + + return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class); + } + + @Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true) + public void testOneReadWithTwoOrThreeHaplotypes(BasicLikelihoodTestProvider cfg) { + double[][] calculatedMatrix = cfg.calcDiploidHaplotypeMatrix(); + double[][] expectedMatrix = cfg.expectedDiploidHaplotypeMatrix(); + logger.warn(String.format("Test: %s", cfg.toString())); + Assert.assertTrue(compareDoubleArrays(calculatedMatrix, expectedMatrix)); + } + + /** + * Private function to compare 2d arrays + */ + private boolean compareDoubleArrays(double[][] b1, double[][] b2) { + if( b1.length != b2.length ) { + return false; // sanity check + } + + for( int i=0; i < b1.length; i++ ){ + if( b1[i].length != b2[i].length) { + return false; // sanity check + } + for( int j=0; j < b1.length; j++ ){ + if ( MathUtils.compareDoubles(b1[i][j], b2[i][j]) != 0 && !Double.isInfinite(b1[i][j]) && !Double.isInfinite(b2[i][j])) + return false; + } + } + return true; + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java new file mode 100644 index 000000000..5652b118d --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java @@ -0,0 +1,298 @@ +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 3/27/12 + */ + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider; +import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.jgrapht.graph.DefaultDirectedGraph; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.*; + +public class SimpleDeBruijnAssemblerUnitTest extends BaseTest { + + + private class MergeNodesWithNoVariationTestProvider extends TestDataProvider { + public byte[] sequence; + public int KMER_LENGTH; + + public MergeNodesWithNoVariationTestProvider(String seq, int kmer) { + super(MergeNodesWithNoVariationTestProvider.class, String.format("Merge nodes with no variation test. kmer = %d, seq = %s", kmer, seq)); + sequence = seq.getBytes(); + KMER_LENGTH = kmer; + } + + public DefaultDirectedGraph expectedGraph() { + DeBruijnVertex v = new DeBruijnVertex(sequence, 0); + DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class); + graph.addVertex(v); + return graph; + } + + public DefaultDirectedGraph calcGraph() { + + DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class); + final int kmersInSequence = sequence.length - KMER_LENGTH + 1; + for (int i = 0; i < kmersInSequence - 1; i++) { + // get the kmers + final byte[] kmer1 = new byte[KMER_LENGTH]; + System.arraycopy(sequence, i, kmer1, 0, KMER_LENGTH); + final byte[] kmer2 = new byte[KMER_LENGTH]; + System.arraycopy(sequence, i+1, kmer2, 0, KMER_LENGTH); + + SimpleDeBruijnAssembler.addKmersToGraph(graph, kmer1, kmer2, false); + } + SimpleDeBruijnAssembler.mergeNodes(graph); + return graph; + } + } + + @DataProvider(name = "MergeNodesWithNoVariationTestProvider") + public Object[][] makeMergeNodesWithNoVariationTests() { + new MergeNodesWithNoVariationTestProvider("GGTTAACC", 3); + new MergeNodesWithNoVariationTestProvider("GGTTAACC", 4); + new MergeNodesWithNoVariationTestProvider("GGTTAACC", 5); + new MergeNodesWithNoVariationTestProvider("GGTTAACC", 6); + new MergeNodesWithNoVariationTestProvider("GGTTAACC", 7); + new MergeNodesWithNoVariationTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", 6); + new MergeNodesWithNoVariationTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 66); + new MergeNodesWithNoVariationTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 76); + + return MergeNodesWithNoVariationTestProvider.getTests(MergeNodesWithNoVariationTestProvider.class); + } + + @Test(dataProvider = "MergeNodesWithNoVariationTestProvider", enabled = true) + public void testMergeNodesWithNoVariation(MergeNodesWithNoVariationTestProvider cfg) { + logger.warn(String.format("Test: %s", cfg.toString())); + Assert.assertTrue(graphEquals(cfg.calcGraph(), cfg.expectedGraph())); + } + + @Test(enabled = true) + public void testPruneGraph() { + DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class); + DefaultDirectedGraph expectedGraph = new DefaultDirectedGraph(DeBruijnEdge.class); + + DeBruijnVertex v = new DeBruijnVertex("ATGG".getBytes(), 0); + DeBruijnVertex v2 = new DeBruijnVertex("ATGGA".getBytes(), 0); + DeBruijnVertex v3 = new DeBruijnVertex("ATGGT".getBytes(), 0); + DeBruijnVertex v4 = new DeBruijnVertex("ATGGG".getBytes(), 0); + DeBruijnVertex v5 = new DeBruijnVertex("ATGGC".getBytes(), 0); + DeBruijnVertex v6 = new DeBruijnVertex("ATGGCCCCCC".getBytes(), 0); + + graph.addVertex(v); + graph.addVertex(v2); + graph.addVertex(v3); + graph.addVertex(v4); + graph.addVertex(v5); + graph.addVertex(v6); + graph.addEdge(v, v2, new DeBruijnEdge(false, 1)); + graph.addEdge(v2, v3, new DeBruijnEdge(false, 3)); + graph.addEdge(v3, v4, new DeBruijnEdge(false, 5)); + graph.addEdge(v4, v5, new DeBruijnEdge(false, 3)); + graph.addEdge(v5, v6, new DeBruijnEdge(false, 2)); + + expectedGraph.addVertex(v2); + expectedGraph.addVertex(v3); + expectedGraph.addVertex(v4); + expectedGraph.addVertex(v5); + expectedGraph.addEdge(v2, v3, new DeBruijnEdge(false, 3)); + expectedGraph.addEdge(v3, v4, new DeBruijnEdge(false, 5)); + expectedGraph.addEdge(v4, v5, new DeBruijnEdge(false, 3)); + + SimpleDeBruijnAssembler.pruneGraph(graph, 2); + + Assert.assertTrue(graphEquals(graph, expectedGraph)); + + graph = new DefaultDirectedGraph(DeBruijnEdge.class); + expectedGraph = new DefaultDirectedGraph(DeBruijnEdge.class); + + graph.addVertex(v); + graph.addVertex(v2); + graph.addVertex(v3); + graph.addVertex(v4); + graph.addVertex(v5); + graph.addVertex(v6); + graph.addEdge(v, v2, new DeBruijnEdge(true, 1)); + graph.addEdge(v2, v3, new DeBruijnEdge(false, 3)); + graph.addEdge(v3, v4, new DeBruijnEdge(false, 5)); + graph.addEdge(v4, v5, new DeBruijnEdge(false, 3)); + + expectedGraph.addVertex(v); + expectedGraph.addVertex(v2); + expectedGraph.addVertex(v3); + expectedGraph.addVertex(v4); + expectedGraph.addVertex(v5); + expectedGraph.addEdge(v, v2, new DeBruijnEdge(true, 1)); + expectedGraph.addEdge(v2, v3, new DeBruijnEdge(false, 3)); + expectedGraph.addEdge(v3, v4, new DeBruijnEdge(false, 5)); + expectedGraph.addEdge(v4, v5, new DeBruijnEdge(false, 3)); + + SimpleDeBruijnAssembler.pruneGraph(graph, 2); + + Assert.assertTrue(graphEquals(graph, expectedGraph)); + } + + @Test(enabled=false) +// not ready yet + public void testBasicGraphCreation() { + final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); + final byte refBase = refPileupTestProvider.getReferenceContext().getBase(); + final String altBase = (refBase==(byte)'A'?"C":"A"); + final int matches = 50; + final int mismatches = 50; + Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30); + PrintStream graphWriter = null; + + try{ + graphWriter = new PrintStream("du.txt"); + } catch (Exception e) {} + + + SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter); + final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases()); + refHaplotype.setIsReference(true); + assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype); + +/* // clean up the graphs by pruning and merging + for( final DefaultDirectedGraph graph : graphs ) { + SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR ); + //eliminateNonRefPaths( graph ); + SimpleDeBruijnAssembler.mergeNodes( graph ); + } + */ + if( graphWriter != null ) { + assembler.printGraphs(); + } + + int k=2; + + // find the best paths in the graphs + // return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); + + } + @Test(enabled = true) + public void testEliminateNonRefPaths() { + DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class); + DefaultDirectedGraph expectedGraph = new DefaultDirectedGraph(DeBruijnEdge.class); + + DeBruijnVertex v = new DeBruijnVertex("ATGG".getBytes(), 0); + DeBruijnVertex v2 = new DeBruijnVertex("ATGGA".getBytes(), 0); + DeBruijnVertex v3 = new DeBruijnVertex("ATGGT".getBytes(), 0); + DeBruijnVertex v4 = new DeBruijnVertex("ATGGG".getBytes(), 0); + DeBruijnVertex v5 = new DeBruijnVertex("ATGGC".getBytes(), 0); + DeBruijnVertex v6 = new DeBruijnVertex("ATGGCCCCCC".getBytes(), 0); + + graph.addVertex(v); + graph.addVertex(v2); + graph.addVertex(v3); + graph.addVertex(v4); + graph.addVertex(v5); + graph.addVertex(v6); + graph.addEdge(v, v2, new DeBruijnEdge(false)); + graph.addEdge(v2, v3, new DeBruijnEdge(true)); + graph.addEdge(v3, v4, new DeBruijnEdge(true)); + graph.addEdge(v4, v5, new DeBruijnEdge(true)); + graph.addEdge(v5, v6, new DeBruijnEdge(false)); + + expectedGraph.addVertex(v2); + expectedGraph.addVertex(v3); + expectedGraph.addVertex(v4); + expectedGraph.addVertex(v5); + expectedGraph.addEdge(v2, v3, new DeBruijnEdge()); + expectedGraph.addEdge(v3, v4, new DeBruijnEdge()); + expectedGraph.addEdge(v4, v5, new DeBruijnEdge()); + + SimpleDeBruijnAssembler.eliminateNonRefPaths(graph); + + Assert.assertTrue(graphEquals(graph, expectedGraph)); + + + + + graph = new DefaultDirectedGraph(DeBruijnEdge.class); + expectedGraph = new DefaultDirectedGraph(DeBruijnEdge.class); + + graph.addVertex(v); + graph.addVertex(v2); + graph.addVertex(v3); + graph.addVertex(v4); + graph.addVertex(v5); + graph.addVertex(v6); + graph.addEdge(v, v2, new DeBruijnEdge(true)); + graph.addEdge(v2, v3, new DeBruijnEdge(true)); + graph.addEdge(v4, v5, new DeBruijnEdge(false)); + graph.addEdge(v5, v6, new DeBruijnEdge(false)); + + expectedGraph.addVertex(v); + expectedGraph.addVertex(v2); + expectedGraph.addVertex(v3); + expectedGraph.addEdge(v, v2, new DeBruijnEdge()); + expectedGraph.addEdge(v2, v3, new DeBruijnEdge()); + + SimpleDeBruijnAssembler.eliminateNonRefPaths(graph); + + Assert.assertTrue(graphEquals(graph, expectedGraph)); + + + + graph = new DefaultDirectedGraph(DeBruijnEdge.class); + expectedGraph = new DefaultDirectedGraph(DeBruijnEdge.class); + + graph.addVertex(v); + graph.addVertex(v2); + graph.addVertex(v3); + graph.addVertex(v4); + graph.addVertex(v5); + graph.addVertex(v6); + graph.addEdge(v, v2, new DeBruijnEdge(true)); + graph.addEdge(v2, v3, new DeBruijnEdge(true)); + graph.addEdge(v4, v5, new DeBruijnEdge(false)); + graph.addEdge(v5, v6, new DeBruijnEdge(false)); + graph.addEdge(v4, v2, new DeBruijnEdge(false)); + + expectedGraph.addVertex(v); + expectedGraph.addVertex(v2); + expectedGraph.addVertex(v3); + expectedGraph.addEdge(v, v2, new DeBruijnEdge()); + expectedGraph.addEdge(v2, v3, new DeBruijnEdge()); + + SimpleDeBruijnAssembler.eliminateNonRefPaths(graph); + + Assert.assertTrue(graphEquals(graph, expectedGraph)); + } + + private boolean graphEquals(DefaultDirectedGraph g1, DefaultDirectedGraph g2) { + if( !(g1.vertexSet().containsAll(g2.vertexSet()) && g2.vertexSet().containsAll(g1.vertexSet())) ) { + return false; + } + for( DeBruijnEdge e1 : g1.edgeSet() ) { + boolean found = false; + for( DeBruijnEdge e2 : g2.edgeSet() ) { + if( e1.equals(g1, e2, g2) ) { found = true; break; } + } + if( !found ) { return false; } + } + for( DeBruijnEdge e2 : g2.edgeSet() ) { + boolean found = false; + for( DeBruijnEdge e1 : g1.edgeSet() ) { + if( e2.equals(g2, e1, g1) ) { found = true; break; } + } + if( !found ) { return false; } + } + return true; + } +} diff --git a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R deleted file mode 100644 index 15c6fc8f0..000000000 --- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R +++ /dev/null @@ -1,121 +0,0 @@ -#!/bin/env Rscript - -library(tools) - -args <- commandArgs(TRUE) -verbose = TRUE - -input = args[1] -covariateName = args[2] - -outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -c <- read.table(input, header=T) -c <- c[sort.list(c[,1]),] - -# -# Plot residual error as a function of the covariate -# - -d.good <- c[c$nBases >= 1000,] -d.1000 <- c[c$nBases < 1000,] -rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh -rmseAll = sqrt( sum(as.numeric((c$Qempirical-c$Qreported)^2 * c$nBases)) / sum(as.numeric(c$nBases)) ) -theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3)) -if( length(d.good$nBases) == length(c$nBases) ) { - theTitle = paste("RMSE =", round(rmseAll,digits=3)) -} -# Don't let residual error go off the edge of the plot -d.good$residualError = d.good$Qempirical-d.good$Qreported -d.good$residualError[which(d.good$residualError > 10)] = 10 -d.good$residualError[which(d.good$residualError < -10)] = -10 -d.1000$residualError = d.1000$Qempirical-d.1000$Qreported -d.1000$residualError[which(d.1000$residualError > 10)] = 10 -d.1000$residualError[which(d.1000$residualError < -10)] = -10 -c$residualError = c$Qempirical-c$Qreported -c$residualError[which(c$residualError > 10)] = 10 -c$residualError[which(c$residualError < -10)] = -10 -pointType = "p" -if( length(c$Covariate) <= 20 ) { - pointType = "o" -} -if( is.numeric(c$Covariate) ) { - plot(d.good$Covariate, d.good$residualError, type=pointType, main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate))) - points(d.1000$Covariate, d.1000$residualError, type=pointType, col="cornflowerblue", pch=20) -} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice - plot(c$Covariate, c$residualError, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10)) - points(d.1000$Covariate, d.1000$residualError, type="l", col="cornflowerblue") -} -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} - -# -# Plot mean quality versus the covariate -# - -outfile = paste(input, ".reported_qual_v_", covariateName, ".pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -pointType = "p" -if( length(c$Covariate) <= 20 ) { - pointType = "o" -} -theTitle = paste("Quality By", covariateName); -if( is.numeric(c$Covariate) ) { - plot(d.good$Covariate, d.good$Qreported, type=pointType, main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(0, 40), xlim=c(min(c$Covariate),max(c$Covariate))) - points(d.1000$Covariate, d.1000$Qreported, type=pointType, col="cornflowerblue", pch=20) -} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice - plot(c$Covariate, c$Qreported, type="l", main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", ylim=c(0, 40)) - points(d.1000$Covariate, d.1000$Qreported, type="l", col="cornflowerblue") -} -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} - -# -# Plot histogram of the covariate -# - -e = d.good -f = d.1000 -outfile = paste(input, ".", covariateName,"_hist.pdf", sep="") -pdf(outfile, height=7, width=7) -hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0) -hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0) - -lwdSize=2 -if( length(c$Covariate) <= 20 ) { - lwdSize=7 -} else if( length(c$Covariate) <= 70 ) { - lwdSize=4 -} - -if( is.numeric(c$Covariate) ) { - if( length(hst$e.Covariate) == 0 ) { - plot(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue", main=paste(covariateName,"histogram"), ylim=c(0, max(hst2$f.nBases)), xlab=covariateName, ylab="Count",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate))) - } else { - plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), xlab=covariateName, ylim=c(0, max(hst$e.nBases)),ylab="Number of Bases",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate))) - points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue") - } - axis(2,axTicks(2), format(axTicks(2), scientific=F)) -} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice - hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0) - plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), ylim=c(0, max(hst$c.nBases)),xlab=covariateName, ylab="Number of Bases",yaxt="n",xaxt="n") - if( length(hst$c.Covariate) > 9 ) { - axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)]) - } else { - axis(1, at=seq(1,length(hst$c.Covariate),1), labels = hst$c.Covariate) - } - axis(2,axTicks(2), format(axTicks(2), scientific=F)) -} -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} diff --git a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R deleted file mode 100644 index 33eeb1f16..000000000 --- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/env Rscript - -library(tools) - -args <- commandArgs(TRUE) - -input = args[1] -Qcutoff = as.numeric(args[2]) -maxQ = as.numeric(args[3]) -maxHist = as.numeric(args[4]) - -t=read.table(input, header=T) - -# -# Plot of reported quality versus empirical quality -# - -outfile = paste(input, ".quality_emp_v_stated.pdf", sep="") -pdf(outfile, height=7, width=7) -d.good <- t[t$nBases >= 10000 & t$Qreported >= Qcutoff,] -d.1000 <- t[t$nBases < 1000 & t$Qreported >= Qcutoff,] -d.10000 <- t[t$nBases < 10000 & t$nBases >= 1000 & t$Qreported >= Qcutoff,] -f <- t[t$Qreported < Qcutoff,] -e <- rbind(d.good, d.1000, d.10000) -rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh -rmseAll = sqrt( sum(as.numeric((e$Qempirical-e$Qreported)^2 * e$nBases)) / sum(as.numeric(e$nBases)) ) -theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3)) -if( length(t$nBases) - length(f$nBases) == length(d.good$nBases) ) { - theTitle = paste("RMSE =", round(rmseAll,digits=3)); -} -plot(d.good$Qreported, d.good$Qempirical, type="p", col="blue", main=theTitle, xlim=c(0,maxQ), ylim=c(0,maxQ), pch=16, xlab="Reported quality score", ylab="Empirical quality score") -points(d.1000$Qreported, d.1000$Qempirical, type="p", col="lightblue", pch=16) -points(d.10000$Qreported, d.10000$Qempirical, type="p", col="cornflowerblue", pch=16) -points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16) -abline(0,1, lty=2) -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} - -# -# Plot Q empirical histogram -# - -outfile = paste(input, ".quality_emp_hist.pdf", sep="") -pdf(outfile, height=7, width=7) -hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0) -hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0) -percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases)) -entropy = -sum(log2(percentBases)*percentBases) -yMax = max(hst$e.nBases) -if(maxHist != 0) { -yMax = maxHist -} -plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n") -points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1") -axis(2,axTicks(2), format(axTicks(2), scientific=F)) -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} - -# -# Plot Q reported histogram -# - -outfile = paste(input, ".quality_rep_hist.pdf", sep="") -pdf(outfile, height=7, width=7) -hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0) -hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0) -yMax = max(hst$e.nBases) -if(maxHist != 0) { -yMax = maxHist -} -plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n") -points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1") -axis(2,axTicks(2), format(axTicks(2), scientific=F)) -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} diff --git a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R new file mode 100644 index 000000000..6c4dace1d --- /dev/null +++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R @@ -0,0 +1,101 @@ +library("ggplot2") +library("tools") #For compactPDF in R 2.13+ + +args <- commandArgs(TRUE) +data <- read.csv(args[1]) +data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType)))) + +numRG = length(unique(data$ReadGroup)) +blankTheme = opts(panel.grid.major = theme_blank(), panel.grid.minor = theme_blank(), panel.background = theme_blank(), axis.ticks = theme_blank()) + +# Viewport (layout 2 graphs top to bottom) +distributeGraphRows <- function(graphs, heights = c()) { + if (length(heights) == 0) { + heights <- rep.int(1, length(graphs)) + } + heights <- heights[!is.na(graphs)] + graphs <- graphs[!is.na(graphs)] + numGraphs <- length(graphs) + Layout <- grid.layout(nrow = numGraphs, ncol = 1, heights=heights) + grid.newpage() + pushViewport(viewport(layout = Layout)) + subplot <- function(x) viewport(layout.pos.row = x, layout.pos.col = 1) + for (i in 1:numGraphs) { + print(graphs[[i]], vp = subplot(i)) + } +} + + +for(cov in levels(data$CovariateName)) { # for each covariate in turn + d = data[data$CovariateName==cov,] # pull out just the data for this covariate so we can treat the non-numeric values appropriately + if( cov == "Context" ) { + d$CovariateValue = as.character(d$CovariateValue) + d$CovariateValue = substring(d$CovariateValue,nchar(d$CovariateValue)-2,nchar(d$CovariateValue)) + } else { + d$CovariateValue = as.numeric(levels(d$CovariateValue))[as.integer(d$CovariateValue)] # efficient way to convert factors back to their real values + } + #d=subset(d,Observations>2000) # only show bins which have enough data to actually estimate the quality + dSub=subset(d,EventType=="Base Substitution") + dIns=subset(d,EventType=="Base Insertion") + dDel=subset(d,EventType=="Base Deletion") + dSub=dSub[sample.int(length(dSub[,1]),min(length(dSub[,1]),2000)),] # don't plot too many values because it makes the PDFs too massive + dIns=dIns[sample.int(length(dIns[,1]),min(length(dIns[,1]),2000)),] # don't plot too many values because it makes the PDFs too massive + dDel=dDel[sample.int(length(dDel[,1]),min(length(dDel[,1]),2000)),] # don't plot too many values because it makes the PDFs too massive + d=rbind(dSub, dIns, dDel) + + if( cov != "QualityScore" ) { + p <- ggplot(d, aes(x=CovariateValue,y=Accuracy,alpha=log10(Observations))) + + geom_abline(intercept=0, slope=0, linetype=2) + + xlab(paste(cov,"Covariate")) + + ylab("Quality Score Accuracy") + + blankTheme + if(cov == "Cycle") { + b <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + opts(axis.text.x=theme_text(angle=90, hjust=0)) + + p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) + + xlab(paste(cov,"Covariate")) + + ylab("Mean Quality Score") + + blankTheme + e <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + opts(axis.text.x=theme_text(angle=90, hjust=0)) + + + } else { + c <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + opts(axis.text.x=theme_text(angle=90, hjust=0)) + xlab(paste(cov,"Covariate (3 base suffix)")) + p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) + + xlab(paste(cov,"Covariate (3 base suffix)")) + + ylab("Mean Quality Score") + + blankTheme + f <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + opts(axis.text.x=theme_text(angle=90, hjust=0)) + + } + } else { + p <- ggplot(d, aes(x=AverageReportedQuality,y=EmpiricalQuality,alpha=log10(Observations))) + + geom_abline(intercept=0, slope=1, linetype=2) + + xlab("Reported Quality Score") + + ylab("Empirical Quality Score") + + blankTheme + a <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + p <- ggplot(d, aes(x=CovariateValue)) + + xlab(paste(cov,"Covariate")) + + ylab("Number of Observations") + + blankTheme + d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + scale_y_continuous(formatter="comma") + + } +} + +pdf(args[2],height=9,width=15) +distributeGraphRows(list(a,b,c), c(1,1,1)) +distributeGraphRows(list(d,e,f), c(1,1,1)) +dev.off() + + +if (exists('compactPDF')) { + compactPDF(args[2]) +} diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd index 2b8d6db9f..dc7a08287 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd @@ -19,9 +19,9 @@ Medical and Population Genetics Program Maintainer: Kiran Garimella } \references{ -GSA wiki page: http://www.broadinstitute.org/gsa/wiki +GSA wiki page: http://www.broadinstitute.org/gatk -GATK help forum: http://www.getsatisfaction.com/gsa +GATK help forum: http://www.broadinstitute.org/gatk } \examples{ ## get script arguments in interactive and non-interactive mode diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java rename to public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java index a342cf932..e8eea5ff0 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java @@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Iterator; @@ -46,7 +48,8 @@ import java.util.Iterator; * @author mhanna * @version 0.1 */ -public class AlignmentValidationWalker extends ReadWalker { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class AlignmentValidation extends ReadWalker { /** * The supporting BWT index generated using BWT. */ diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index c8554573b..6206fc2ce 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -50,6 +52,7 @@ import java.io.File; * @author mhanna * @version 0.1 */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @WalkerName("Align") public class AlignmentWalker extends ReadWalker { @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + diff --git a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java rename to public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java index d91b83e7a..336c95d42 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java +++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java @@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; @@ -48,7 +50,8 @@ import java.util.TreeMap; * @author mhanna * @version 0.1 */ -public class CountBestAlignmentsWalker extends ReadWalker { +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class CountBestAlignments extends ReadWalker { /** * The supporting BWT index generated using BWT. */ diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java deleted file mode 100755 index 1230c86be..000000000 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.analyzecovariates; - -import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; -import org.broadinstitute.sting.utils.collections.NestedHashMap; - -import java.util.ArrayList; - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Dec 1, 2009 - * - * The difference between this AnalysisDataManager and the RecalDataManager used by the Recalibration walkers is that here the collapsed data tables are indexed - * by only read group and the given covariate, while in the recalibrator the collapsed tables are indexed by read group, reported quality, and the given covariate. - */ - -public class AnalysisDataManager { - - private NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed - private ArrayList dataCollapsedByCovariate; // Tables where everything except read group and given covariate has been collapsed - - AnalysisDataManager() { - } - - AnalysisDataManager( final int numCovariates ) { - dataCollapsedReadGroup = new NestedHashMap(); - dataCollapsedByCovariate = new ArrayList(); - for( int iii = 0; iii < numCovariates - 1; iii++ ) { // readGroup isn't counted here, its table is separate - dataCollapsedByCovariate.add( new NestedHashMap() ); - } - } - - /** - * Add the given mapping to all of the collapsed hash tables - * @param key The list of comparables that is the key for this mapping - * @param fullDatum The RecalDatum which is the data for this mapping - * @param IGNORE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table - */ - public final void addToAllTables( final Object[] key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) { - - final int qscore = Integer.parseInt( key[1].toString() ); - RecalDatum collapsedDatum; - final Object[] readGroupCollapsedKey = new Object[1]; - final Object[] covariateCollapsedKey = new Object[2]; - - if( !(qscore < IGNORE_QSCORES_LESS_THAN) ) { - // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed - readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group - collapsedDatum = (RecalDatum)dataCollapsedReadGroup.get( readGroupCollapsedKey ); - if( collapsedDatum == null ) { - dataCollapsedReadGroup.put( new RecalDatum(fullDatum), readGroupCollapsedKey ); - } else { - collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported - } - } - - // Create dataCollapsedByCovariate's, the tables where everything except read group and given covariate has been collapsed - for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) { - if( iii == 0 || !(qscore < IGNORE_QSCORES_LESS_THAN) ) { // use all data for the plot versus reported quality, but not for the other plots versus cycle and etc. - covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ... - Object theCovariateElement = key[iii + 1]; // and the given covariate - if( theCovariateElement != null ) { - covariateCollapsedKey[1] = theCovariateElement; - collapsedDatum = (RecalDatum)dataCollapsedByCovariate.get(iii).get( covariateCollapsedKey ); - if( collapsedDatum == null ) { - dataCollapsedByCovariate.get(iii).put( new RecalDatum(fullDatum), covariateCollapsedKey ); - } else { - collapsedDatum.combine( fullDatum ); - } - } - } - } - } - - /** - * Get the appropriate collapsed table out of the set of all the tables held by this Object - * @param covariate Which covariate indexes the desired collapsed HashMap - * @return The desired collapsed HashMap - */ - public final NestedHashMap getCollapsedTable( final int covariate ) { - if( covariate == 0) { - return dataCollapsedReadGroup; // Table where everything except read group has been collapsed - } else { - return dataCollapsedByCovariate.get( covariate - 1 ); // Table where everything except read group, quality score, and given covariate has been collapsed - } - } - -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java deleted file mode 100755 index 59a3d8cdb..000000000 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.analyzecovariates; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; -import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; -import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; -import org.broadinstitute.sting.utils.R.RScriptExecutor; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.io.Resource; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.*; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Map; -import java.util.regex.Pattern; - -/** - * Call R scripts to plot residual error versus the various covariates. - * - *

- * After counting covariates in either the initial BAM File or again in the recalibrated BAM File, an analysis tool is available which - * reads the .csv file and outputs several PDF (and .dat) files for each read group in the given BAM. These PDF files graphically - * show the various metrics and characteristics of the reported quality scores (often in relation to the empirical qualities). - * In order to show that any biases in the reported quality scores have been generally fixed through recalibration one should run - * CountCovariates again on a bam file produced by TableRecalibration. In this way users can compare the analysis plots generated - * by pre-recalibration and post-recalibration .csv files. Our usual chain of commands that we use to generate plots of residual - * error is: CountCovariates, TableRecalibrate, samtools index on the recalibrated bam file, CountCovariates again on the recalibrated - * bam file, and then AnalyzeCovariates on both the before and after recal_data.csv files to see the improvement in recalibration. - * - *

- * The color coding along with the RMSE is included in the plots to give some indication of the number of observations that went into - * each of the quality score estimates. It is defined as follows for N, the number of observations: - * - *

    - *
  • light blue means N < 1,000
  • - *
  • cornflower blue means 1,000 <= N < 10,000
  • - *
  • dark blue means N >= 10,000
  • - *
  • The pink dots indicate points whose quality scores are special codes used by the aligner and which are mathematically - * meaningless and so aren't included in any of the numerical calculations.
  • - *
- * - *

- * NOTE: Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version). - * See http://www.r-project.org for more info on how to download and install R. - * - *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration - * - *

Input

- *

- * The recalibration table file in CSV format that was generated by the CountCovariates walker. - *

- * - *

Examples

- *
- * java -Xmx4g -jar AnalyzeCovariates.jar \
- *   -recalFile /path/to/recal.table.csv  \
- *   -outputDir /path/to/output_dir/  \
- *   -ignoreQ 5
- * 
- * - */ - -@DocumentedGATKFeature( - groupName = "AnalyzeCovariates", - summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") -public class AnalyzeCovariates extends CommandLineProgram { - final private static Logger logger = Logger.getLogger(AnalyzeCovariates.class); - - private static final String PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE = "plot_residualError_QualityScoreCovariate.R"; - private static final String PLOT_RESDIUAL_ERROR_OTHER_COVARIATE = "plot_residualError_OtherCovariate.R"; - private static final String PLOT_INDEL_QUALITY_RSCRIPT = "plot_indelQuality.R"; - - ///////////////////////////// - // Command Line Arguments - ///////////////////////////// - /** - * After the header, data records occur one per line until the end of the file. The first several items on a line are the - * values of the individual covariates and will change depending on which covariates were specified at runtime. The last - * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, - * and the raw empirical quality score calculated by phred-scaling the mismatch rate. - */ - @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false) - private String RECAL_FILE = "output.recal_data.csv"; - @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) - private File OUTPUT_DIR = new File("analyzeCovariates"); - @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false) - private int IGNORE_QSCORES_LESS_THAN = 5; - @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) - private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups - - /** - * Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation - * by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later. - */ - @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50") - private int MAX_QUALITY_SCORE = 50; - - /** - * This argument is useful for comparing before/after plots and you want the axes to match each other. - */ - @Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots") - private int MAX_HISTOGRAM_VALUE = 0; - - @Hidden - @Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting") - private boolean DO_INDEL_QUALITY = false; - - ///////////////////////////// - // Private Member Variables - ///////////////////////////// - private AnalysisDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps - private ArrayList requestedCovariates; // List of covariates to be used in this calculation - private final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); - private final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*"); - private final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); - protected static final String EOF_MARKER = "EOF"; - - protected int execute() { - - // create the output directory where all the data tables and plots will go - if (!OUTPUT_DIR.exists() && !OUTPUT_DIR.mkdirs()) - throw new UserException.BadArgumentValue("--output_dir/-outDir", "Unable to create output directory: " + OUTPUT_DIR); - - if (!RScriptExecutor.RSCRIPT_EXISTS) - Utils.warnUser(logger, "Rscript not found in environment path. Plots will not be generated."); - - // initialize all the data from the csv file and allocate the list of covariates - logger.info("Reading in input csv file..."); - initializeData(); - logger.info("...Done!"); - - // output data tables for Rscript to read in - logger.info("Writing out intermediate tables for R..."); - writeDataTables(); - logger.info("...Done!"); - - // perform the analysis using Rscript and output the plots - logger.info("Calling analysis R scripts and writing out figures..."); - callRScripts(); - logger.info("...Done!"); - - return 0; - } - - private void initializeData() { - - // Get a list of all available covariates - Collection> classes = new PluginManager(Covariate.class).getPlugins(); - - int lineNumber = 0; - boolean foundAllCovariates = false; - - // Read in the covariates that were used from the input file - requestedCovariates = new ArrayList(); - - try { - for ( final String line : new XReadLines(new File( RECAL_FILE )) ) { - lineNumber++; - if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() || line.equals(EOF_MARKER) ) { - ; // Skip over the comment lines, (which start with '#') - } - else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data - if( foundAllCovariates ) { - throw new RuntimeException( "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE ); - } else { // Found the covariate list in input file, loop through all of them and instantiate them - String[] vals = line.split(","); - for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical - boolean foundClass = false; - for( Class covClass : classes ) { - if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) { - foundClass = true; - try { - Covariate covariate = (Covariate)covClass.newInstance(); - requestedCovariates.add( covariate ); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - } - - if( !foundClass ) { - throw new RuntimeException( "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." ); - } - } - - } - - } else { // Found a line of data - if( !foundAllCovariates ) { - - foundAllCovariates = true; - - // At this point all the covariates should have been found and initialized - if( requestedCovariates.size() < 2 ) { - throw new RuntimeException( "Malformed input recalibration file. Covariate names can't be found in file: " + RECAL_FILE ); - } - - // Initialize any covariate member variables using the shared argument collection - for( Covariate cov : requestedCovariates ) { - cov.initialize( new RecalibrationArgumentCollection() ); - } - - // Initialize the data hashMaps - dataManager = new AnalysisDataManager( requestedCovariates.size() ); - - } - addCSVData(line); // Parse the line and add the data to the HashMap - } - } - - } catch ( FileNotFoundException e ) { - throw new RuntimeException("Can not find input file: " + RECAL_FILE); - } catch ( NumberFormatException e ) { - throw new RuntimeException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker."); - } - } - - private void addCSVData(String line) { - String[] vals = line.split(","); - - // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly - if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical - throw new RuntimeException("Malformed input recalibration file. Found data line with too many fields: " + line + - " --Perhaps the read group string contains a comma and isn't being parsed correctly."); - } - - Object[] key = new Object[requestedCovariates.size()]; - Covariate cov; - int iii; - for( iii = 0; iii < requestedCovariates.size(); iii++ ) { - cov = requestedCovariates.get( iii ); - key[iii] = cov.getValue( vals[iii] ); - } - // Create a new datum using the number of observations, number of mismatches, and reported quality score - final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 ); - // Add that datum to all the collapsed tables which will be used in the sequential calculation - dataManager.addToAllTables( key, datum, IGNORE_QSCORES_LESS_THAN ); - } - - private void writeDataTables() { - - int numReadGroups = 0; - - // for each read group - for( final Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) { - - if( NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS ) { - final String readGroup = readGroupKey.toString(); - final RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey); - logger.info(String.format( - "Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f", - readGroup, readGroupDatum.getNumObservations(), - readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported())); - - // for each covariate - for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { - Covariate cov = requestedCovariates.get(iii); - - // Create a PrintStream - File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat"); - PrintStream output; - try { - output = new PrintStream(FileUtils.openOutputStream(outputFile)); - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(outputFile, e); - } - - try { - // Output the header - output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases"); - - for( final Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet() ) { - output.print( covariateKey.toString() + "\t" ); // Covariate - final RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey); - output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported - output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" ); // Qempirical - output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches - output.println( thisDatum.getNumObservations() ); // nBases - } - } finally { - // Close the PrintStream - IOUtils.closeQuietly(output); - } - } - } else { - break; - } - - } - } - - private void callRScripts() { - int numReadGroups = 0; - - // for each read group - for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) { - if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) { - - String readGroup = readGroupKey.toString(); - logger.info("Analyzing read group: " + readGroup); - - // for each covariate - for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { - final Covariate cov = requestedCovariates.get(iii); - final File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat"); - if (DO_INDEL_QUALITY) { - RScriptExecutor executor = new RScriptExecutor(); - executor.addScript(new Resource(PLOT_INDEL_QUALITY_RSCRIPT, AnalyzeCovariates.class)); - // The second argument is the name of the covariate in order to make the plots look nice - executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]); - executor.exec(); - } else { - if( iii == 1 ) { - // Analyze reported quality - RScriptExecutor executor = new RScriptExecutor(); - executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE, AnalyzeCovariates.class)); - // The second argument is the Q scores that should be turned pink in the plot because they were ignored - executor.addArgs(outputFile, IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); - executor.exec(); - } else { // Analyze all other covariates - RScriptExecutor executor = new RScriptExecutor(); - executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_OTHER_COVARIATE, AnalyzeCovariates.class)); - // The second argument is the name of the covariate in order to make the plots look nice - executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]); - executor.exec(); - } - } - } - } else { // at the maximum number of read groups so break out - break; - } - } - } - - public static void main(String args[]) { - try { - AnalyzeCovariates clp = new AnalyzeCovariates(); - start(clp, args); - System.exit(CommandLineProgram.result); - } catch (Exception e) { - exitSystemWithError(e); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java deleted file mode 100644 index 9350e4a66..000000000 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java +++ /dev/null @@ -1,4 +0,0 @@ -/** - * Package to plot residual accuracy versus error covariates for the base quality score recalibrator. - */ -package org.broadinstitute.sting.analyzecovariates; \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 9e1be5bca..15ec9dfe5 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -287,8 +287,8 @@ public abstract class CommandLineProgram { * a function used to indicate an error occurred in the command line tool */ private static void printDocumentationReference() { - errorPrintf("Visit our wiki for extensive documentation http://www.broadinstitute.org/gsa/wiki%n"); - errorPrintf("Visit our forum to view answers to commonly asked questions http://getsatisfaction.com/gsa%n"); + errorPrintf("Visit our website and forum for extensive documentation and answers to %n"); + errorPrintf("commonly asked questions http://www.broadinstitute.org/gatk%n"); } @@ -369,9 +369,9 @@ public abstract class CommandLineProgram { System.exit(1); } - public static void exitSystemWithSamError(final Exception e) { - if ( e.getMessage() == null ) - throw new ReviewedStingException("SamException found with no message!", e); + public static void exitSystemWithSamError(final Throwable t) { + if ( t.getMessage() == null ) + throw new ReviewedStingException("SamException found with no message!", t); errorPrintf("------------------------------------------------------------------------------------------%n"); errorPrintf("A BAM ERROR has occurred (version %s): %n", CommandLineGATK.getVersionNumber()); @@ -383,7 +383,7 @@ public abstract class CommandLineProgram { errorPrintf("Also, please ensure that your BAM index is not corrupted: delete the current one and regenerate it with 'samtools index'%n"); printDocumentationReference(); errorPrintf("%n"); - errorPrintf("MESSAGE: %s%n", e.getMessage().trim()); + errorPrintf("MESSAGE: %s%n", t.getMessage().trim()); errorPrintf("------------------------------------------------------------------------------------------%n"); System.exit(1); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index c6bb4a27c..0286cdc52 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -130,8 +130,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram { getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) { if ( getArgumentCollection().gatkKeyFile == null ) { throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + - "for more information and instructions on how to obtain a key."); + "Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + + " for more information and instructions on how to obtain a key."); } else { PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 70c6bc734..312d31727 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -36,22 +36,23 @@ import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.walkers.Attribution; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.help.*; +import org.broadinstitute.sting.utils.help.ApplicationDetails; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.GATKDocUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; /** + * All command line parameters accepted by all tools in the GATK. + * * The GATK engine itself. Manages map/reduce data access and runs walkers. * * We run command line GATK programs using this class. It gets the command line args, parses them, and hands the * gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here, * the gatk engine should deal with any data related information. */ -@DocumentedGATKFeature( - groupName = "GATK Engine", - summary = "Features and arguments for the GATK engine itself, available to all walkers.", - extraDocs = { UserException.class }) +@DocumentedGATKFeature(groupName = "GATK Engine") public class CommandLineGATK extends CommandLineExecutable { @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run") private String analysisName = null; @@ -101,20 +102,41 @@ public class CommandLineGATK extends CommandLineExecutable { // TODO: Should Picard exceptions be, in general, UserExceptions or ReviewedStingExceptions? exitSystemWithError(e); } catch (SAMException e) { - checkForTooManyOpenFilesProblem(e.getMessage()); + checkForMaskedUserErrors(e); exitSystemWithSamError(e); } catch (OutOfMemoryError e) { exitSystemWithUserError(new UserException.NotEnoughMemory()); } catch (Throwable t) { - checkForTooManyOpenFilesProblem(t.getMessage()); + checkForMaskedUserErrors(t); exitSystemWithError(t); } } - private static void checkForTooManyOpenFilesProblem(String message) { - // Special case the "Too many open files" error because it's a common User Error for which we know what to do - if ( message != null && message.indexOf("Too many open files") != -1 ) + protected static final String PICARD_TEXT_SAM_FILE_ERROR_1 = "Cannot use index file with textual SAM file"; + protected static final String PICARD_TEXT_SAM_FILE_ERROR_2 = "Cannot retrieve file pointers within SAM text files"; + private static void checkForMaskedUserErrors(final Throwable t) { + final String message = t.getMessage(); + if ( message == null ) + return; + + // we know what to do about the common "Too many open files" error + if ( message.indexOf("Too many open files") != -1 ) exitSystemWithUserError(new UserException.TooManyOpenFiles()); + + // malformed BAM looks like a SAM file + if ( message.indexOf(PICARD_TEXT_SAM_FILE_ERROR_1) != -1 || + message.indexOf(PICARD_TEXT_SAM_FILE_ERROR_2) != -1 ) + exitSystemWithSamError(t); + + // can't close tribble index when writing + if ( message.indexOf("Unable to close index for") != -1 ) + exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage())); + + // disk is full + if ( message.indexOf("No space left on device") != -1 ) + exitSystemWithUserError(new UserException(t.getMessage())); + if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 ) + exitSystemWithUserError(new UserException(t.getCause().getMessage())); } /** @@ -126,8 +148,7 @@ public class CommandLineGATK extends CommandLineExecutable { List header = new ArrayList(); header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime())); header.add("Copyright (c) 2010 The Broad Institute"); - header.add("Please view our documentation at http://www.broadinstitute.org/gsa/wiki"); - header.add("For support, please view our support site at http://getsatisfaction.com/gsa"); + header.add("For support and documentation go to http://www.broadinstitute.org/gatk"); return header; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index e19a3c613..56fcf0652 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDBBuilder; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.collections.Pair; @@ -197,7 +198,16 @@ public class GenomeAnalysisEngine { private BaseRecalibration baseRecalibration = null; public BaseRecalibration getBaseRecalibration() { return baseRecalibration; } public boolean hasBaseRecalibration() { return baseRecalibration != null; } - public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean noIndelQuals) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, noIndelQuals); } + public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) { + baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan, emitOriginalQuals); + } + + /** + * Utility method to determine whether this is the lite version of the GATK + */ + public boolean isGATKLite() { + return GATKLiteUtils.isGATKLite(); + } /** * Actually run the GATK with the specified walker. @@ -209,8 +219,10 @@ public class GenomeAnalysisEngine { //monitor.start(); setStartTime(new java.util.Date()); + final GATKArgumentCollection args = this.getArguments(); + // validate our parameters - if (this.getArguments() == null) { + if (args == null) { throw new ReviewedStingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null."); } @@ -218,16 +230,16 @@ public class GenomeAnalysisEngine { if (this.walker == null) throw new ReviewedStingException("The walker passed to GenomeAnalysisEngine can not be null."); - if (this.getArguments().nonDeterministicRandomSeed) + if (args.nonDeterministicRandomSeed) resetRandomGenerator(System.currentTimeMillis()); // TODO -- REMOVE ME WHEN WE STOP BCF testing - if ( this.getArguments().USE_SLOW_GENOTYPES ) + if ( args.USE_SLOW_GENOTYPES ) GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false; // if the use specified an input BQSR recalibration table then enable on the fly recalibration - if (this.getArguments().BQSR_RECAL_FILE != null) - setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels, this.getArguments().noIndelQuals); + if (args.BQSR_RECAL_FILE != null) + setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals); // Determine how the threads should be divided between CPU vs. IO. determineThreadAllocation(); @@ -262,6 +274,38 @@ public class GenomeAnalysisEngine { //return result; } + // TODO -- Let's move this to a utility class in unstable - but which one? + // ************************************************************************************** + // * Handle Deprecated Walkers * + // ************************************************************************************** + + // Mapping from walker name to major version number where the walker first disappeared + private static Map deprecatedGATKWalkers = new HashMap(); + static { + deprecatedGATKWalkers.put("CountCovariates", "2.0"); + deprecatedGATKWalkers.put("TableRecalibration", "2.0"); + } + + /** + * Utility method to check whether a given walker has been deprecated in a previous GATK release + * + * @param walkerName the walker class name (not the full package) to check + */ + public static boolean isDeprecatedWalker(final String walkerName) { + return deprecatedGATKWalkers.containsKey(walkerName); + } + + /** + * Utility method to check whether a given walker has been deprecated in a previous GATK release + * + * @param walkerName the walker class name (not the full package) to check + */ + public static String getDeprecatedMajorVersionNumber(final String walkerName) { + return deprecatedGATKWalkers.get(walkerName); + } + + // ************************************************************************************** + /** * Retrieves an instance of the walker based on the walker name. * @@ -269,7 +313,17 @@ public class GenomeAnalysisEngine { * @return An instance of the walker. */ public Walker getWalkerByName(String walkerName) { - return walkerManager.createByName(walkerName); + try { + return walkerManager.createByName(walkerName); + } catch ( UserException e ) { + if ( isGATKLite() && GATKLiteUtils.isAvailableOnlyInFullGATK(walkerName) ) { + e = new UserException.NotSupportedInGATKLite("the " + walkerName + " walker is available only in the full version of the GATK"); + } + else if ( isDeprecatedWalker(walkerName) ) { + e = new UserException.DeprecatedWalker(walkerName, getDeprecatedMajorVersionNumber(walkerName)); + } + throw e; + } } /** @@ -743,6 +797,14 @@ public class GenomeAnalysisEngine { if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); + if (argCollection.removeProgramRecords && argCollection.keepProgramRecords) + throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options"); + + boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class); + + if (argCollection.keepProgramRecords) + removeProgramRecords = false; + return new SAMDataSource( samReaderIDs, threadAllocation, @@ -759,7 +821,8 @@ public class GenomeAnalysisEngine { getWalkerBAQQualityMode(), refReader, getBaseRecalibration(), - argCollection.defaultBaseQualities); + argCollection.defaultBaseQualities, + removeProgramRecords); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index f053c299c..8843d4bfe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -49,7 +49,7 @@ public class WalkerManager extends PluginManager { private ResourceBundle helpText; public WalkerManager() { - super(Walker.class,"walker","Walker"); + super(Walker.class,"walker",""); helpText = TextFormattingUtils.loadResourceBundle("StingText"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 84e89e8ec..1e6920b82 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.samples.PedigreeValidationType; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; @@ -65,10 +66,10 @@ public class GATKArgumentCollection { @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false) public Integer readBufferSize = null; - @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) + @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false) public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; - @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) + @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false) public File gatkKeyFile = null; @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false) @@ -190,37 +191,70 @@ public class GATKArgumentCollection { @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false) public Boolean useOriginalBaseQualities = false; + // -------------------------------------------------------------------------------------------------------------- + // + // BQSR arguments + // + // -------------------------------------------------------------------------------------------------------------- + /** - * After the header, data records occur one per line until the end of the file. The first several items on a line are the - * values of the individual covariates and will change depending on which covariates were specified at runtime. The last - * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, - * and the raw empirical quality score calculated by phred-scaling the mismatch rate. + * Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. + * Please be aware that one should only run recalibration with the covariates file created on the same input bam(s). */ - @Input(fullName="BQSR", shortName="BQSR", required=false, doc="Filename for the input covariates table recalibration .csv file which enables on the fly base quality score recalibration") - public File BQSR_RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously + @Input(fullName="BQSR", shortName="BQSR", required=false, doc="The input covariates table file which enables on-the-fly base quality score recalibration") + public File BQSR_RECAL_FILE = null; /** * Turns on the base quantization module. It requires a recalibration report (-BQSR). * * A value of 0 here means "do not quantize". - * Any value greater than zero will be used to recalculate the quantization using this many levels. - * Negative values do nothing (i.e. quantize using the recalibration report's quantization level -- same as not providing this parameter at all) + * Any value greater than zero will be used to recalculate the quantization using that many levels. + * Negative values mean that we should quantize using the recalibration report's quantization level. */ - @Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false) - public int quantizationLevels = -1; + @Hidden + @Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels (with -BQSR)", required=false) + public int quantizationLevels = 0; /** - * Turns off printing of the base insertion and base deletion tags when using the -BQSR argument. Only the base substitution qualities will be produced. + * Turns off printing of the base insertion and base deletion tags when using the -BQSR argument and only the base substitution qualities will be produced. */ - @Argument(fullName="no_indel_quals", shortName = "NIQ", doc = "If true, inhibits printing of base insertion and base deletion tags.", required=false) - public boolean noIndelQuals = false; + @Argument(fullName="disable_indel_quals", shortName = "DIQ", doc = "If true, disables printing of base insertion and base deletion tags (with -BQSR)", required=false) + public boolean disableIndelQuals = false; + + /** + * By default, the OQ tag in not emitted when using the -BQSR argument. + */ + @Argument(fullName="emit_original_quals", shortName = "EOQ", doc = "If true, enables printing of the OQ tag with the original base qualities (with -BQSR)", required=false) + public boolean emitOriginalQuals = false; + + /** + * Do not modify quality scores less than this value but rather just write them out unmodified in the recalibrated BAM file. + * In general it's unsafe to change qualities scores below < 6, since base callers use these values to indicate random or bad bases. + * For example, Illumina writes Q2 bases when the machine has really gone wrong. This would be fine in and of itself, + * but when you select a subset of these reads based on their ability to align to the reference and their dinucleotide effect, + * your Q2 bin can be elevated to Q8 or Q10, leading to issues downstream. + */ + @Argument(fullName = "preserve_qscores_less_than", shortName = "preserveQ", doc = "Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)", required = false) + public int PRESERVE_QSCORES_LESS_THAN = QualityUtils.MIN_USABLE_Q_SCORE; @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false) public byte defaultBaseQualities = -1; + // -------------------------------------------------------------------------------------------------------------- + // + // Other utility arguments + // + // -------------------------------------------------------------------------------------------------------------- + @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; + @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false) + public boolean removeProgramRecords = false; + + @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false) + public boolean keepProgramRecords = false; + @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) public ValidationExclusion.TYPE unsafe; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index 2bf75b035..73301c511 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -300,7 +301,7 @@ public class GATKBAMIndex { fileChannel = fileStream.getChannel(); } catch (IOException exc) { - throw new ReviewedStingException("Unable to open index file " + mFile, exc); + throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 0fa4234b3..7f0a0c4c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -89,6 +89,11 @@ public class SAMDataSource { */ private final SAMFileReader.ValidationStringency validationStringency; + /** + * Do we want to remove the program records from this data source? + */ + private final boolean removeProgramRecords; + /** * Store BAM indices for each reader present. */ @@ -200,7 +205,8 @@ public class SAMDataSource { BAQ.QualityMode.DONT_MODIFY, null, // no BAQ null, // no BQSR - (byte) -1); + (byte) -1, + false); } /** @@ -233,7 +239,8 @@ public class SAMDataSource { BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, BaseRecalibration bqsrApplier, - byte defaultBaseQualities) { + byte defaultBaseQualities, + boolean removeProgramRecords) { this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; @@ -249,6 +256,7 @@ public class SAMDataSource { dispatcher = null; validationStringency = strictness; + this.removeProgramRecords = removeProgramRecords; if(readBufferSize != null) ReadShard.setReadBufferSize(readBufferSize); else { @@ -748,7 +756,7 @@ public class SAMDataSource { private synchronized void createNewResource() { if(allResources.size() > maxEntries) throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use."); - SAMReaders readers = new SAMReaders(readerIDs, validationStringency); + SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords); allResources.add(readers); availableResources.add(readers); } @@ -777,9 +785,11 @@ public class SAMDataSource { /** * Derive a new set of readers from the Reads metadata. * @param readerIDs reads to load. + * TODO: validationStringency is not used here * @param validationStringency validation stringency. + * @param removeProgramRecords indicate whether to clear program records from the readers */ - public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { + public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) { final int totalNumberOfFiles = readerIDs.size(); int readerNumber = 1; final SimpleTimer timer = new SimpleTimer().start(); @@ -790,6 +800,9 @@ public class SAMDataSource { long lastTick = timer.currentTime(); for(final SAMReaderID readerID: readerIDs) { final ReaderInitializer init = new ReaderInitializer(readerID).call(); + if (removeProgramRecords) { + init.reader.getFileHeader().setProgramRecords(new ArrayList()); + } if (threadAllocation.getNumIOThreads() > 0) { inputStreams.put(init.readerID, init.blockInputStream); // get from initializer } diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java index 5dbd90405..6780311bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java @@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -17,8 +19,9 @@ import java.util.List; import java.util.Map; /** - * Computes the coverage per sample. + * Computes the coverage per sample for every position (use with -L argument!). */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class CoverageBySample extends LocusWalker { @Output protected PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java index 3069ee528..6482354a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; @@ -46,6 +48,7 @@ import java.io.PrintStream; * * @author aaron */ +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) public class GATKPaperGenotyper extends LocusWalker implements TreeReducible { // the possible diploid genotype strings private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index b821b98e6..70b1be0e1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; import java.util.Collection; @@ -41,6 +40,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar private final Queue reduceTasks = new LinkedList(); + /** + * An exception that's occurred in this traversal. If null, no exception has occurred. + */ + private RuntimeException error = null; + /** * Queue of incoming shards. */ @@ -91,13 +95,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar ReduceTree reduceTree = new ReduceTree(this); initializeWalker(walker); - // - // exception handling here is a bit complex. We used to catch and rethrow exceptions all over - // the place, but that just didn't work well. Now we have a specific execution exception (inner class) - // to use for multi-threading specific exceptions. All RuntimeExceptions that occur within the threads are rethrown - // up the stack as their underlying causes - // while (isShardTraversePending() || isTreeReducePending()) { + // Check for errors during execution. + if(hasTraversalErrorOccurred()) + throw getTraversalError(); + // Too many files sitting around taking up space? Merge them. if (isMergeLimitExceeded()) mergeExistingOutput(false); @@ -113,6 +115,9 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar queueNextShardTraverse(walker, reduceTree); } + if(hasTraversalErrorOccurred()) + throw getTraversalError(); + threadPool.shutdown(); // Merge any lingering output files. If these files aren't ready, @@ -123,9 +128,14 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar try { result = reduceTree.getResult().get(); notifyTraversalDone(walker,result); + } catch (ReviewedStingException ex) { + throw ex; + } catch ( ExecutionException ex ) { + // the thread died and we are failing to get the result, rethrow it as a runtime exception + throw toRuntimeException(ex.getCause()); + } catch (Exception ex) { + throw new ReviewedStingException("Unable to retrieve result", ex); } - catch( InterruptedException ex ) { handleException(ex); } - catch( ExecutionException ex ) { handleException(ex); } // do final cleanup operations outputTracker.close(); @@ -328,39 +338,35 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar } /** - * Handle an exception that occurred in a worker thread as needed by this scheduler. - * - * The way to use this function in a worker is: - * - * try { doSomeWork(); - * catch ( InterruptedException ex ) { hms.handleException(ex); } - * catch ( ExecutionException ex ) { hms.handleException(ex); } - * - * @param ex the exception that occurred in the worker thread + * Detects whether an execution error has occurred. + * @return True if an error has occurred. False otherwise. */ - protected final void handleException(InterruptedException ex) { - throw new HierarchicalMicroScheduler.ExecutionFailure("Hierarchical reduce interrupted", ex); + private synchronized boolean hasTraversalErrorOccurred() { + return error != null; + } + + private synchronized RuntimeException getTraversalError() { + if(!hasTraversalErrorOccurred()) + throw new ReviewedStingException("User has attempted to retrieve a traversal error when none exists"); + return error; } /** - * Handle an exception that occurred in a worker thread as needed by this scheduler. - * - * The way to use this function in a worker is: - * - * try { doSomeWork(); - * catch ( InterruptedException ex ) { hms.handleException(ex); } - * catch ( ExecutionException ex ) { hms.handleException(ex); } - * - * @param ex the exception that occurred in the worker thread + * Allows other threads to notify of an error during traversal. */ - protected final void handleException(ExecutionException ex) { - if ( ex.getCause() instanceof RuntimeException ) - // if the cause was a runtime exception that's what we want to send up the stack - throw (RuntimeException )ex.getCause(); - else - throw new HierarchicalMicroScheduler.ExecutionFailure("Hierarchical reduce failed", ex); + protected synchronized RuntimeException notifyOfTraversalError(Throwable error) { + // If the error is already a Runtime, pass it along as is. Otherwise, wrap it. + this.error = toRuntimeException(error); + return this.error; } + private final RuntimeException toRuntimeException(final Throwable error) { + // If the error is already a Runtime, pass it along as is. Otherwise, wrap it. + if (error instanceof RuntimeException) + return (RuntimeException)error; + else + return new ReviewedStingException("An error occurred during the traversal. Message=" + error.getMessage(), error); + } /** A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics. */ @@ -381,17 +387,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar } } - /** - * A specific exception class for HMS-specific failures such as - * Interrupted or ExecutionFailures that aren't clearly the fault - * of the underlying walker code - */ - public static class ExecutionFailure extends ReviewedStingException { - public ExecutionFailure(final String s, final Throwable throwable) { - super(s, throwable); - } - } - /** * Used by the ShardTraverser to report time consumed traversing a given shard. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 16487054b..b35abb775 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.SampleUtils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index 9920213a3..aefa9c12d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -6,11 +6,11 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; /** * User: hanna * Date: Apr 29, 2009 @@ -79,6 +79,9 @@ public class ShardTraverser implements Callable { microScheduler.reportShardTraverseTime(endTime-startTime); return accumulator; + } catch(Throwable t) { + // Notify that an exception has occurred and rethrow it. + throw microScheduler.notifyOfTraversalError(t); } finally { synchronized(this) { complete = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java index fc8a89c64..632638f64 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java @@ -79,8 +79,14 @@ public class TreeReducer implements Callable { else result = walker.treeReduce( lhs.get(), rhs.get() ); } - catch( InterruptedException ex ) { microScheduler.handleException(ex); } - catch( ExecutionException ex ) { microScheduler.handleException(ex); } + catch( InterruptedException ex ) { + microScheduler.notifyOfTraversalError(ex); + throw new ReviewedStingException("Hierarchical reduce interrupted", ex); + } + catch( ExecutionException ex ) { + microScheduler.notifyOfTraversalError(ex); + throw new ReviewedStingException("Hierarchical reduce failed", ex); + } final long endTime = System.currentTimeMillis(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java index 589910fc7..a9b437dfd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java @@ -28,11 +28,10 @@ import net.sf.samtools.SAMRecord; */ /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Dec 9, 2009 - * * Filter out duplicate reads. + * + * @author rpoplin + * @since Dec 9, 2009 */ public class DuplicateReadFilter extends ReadFilter { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java index 4ec451567..3a5ed7d67 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java @@ -27,11 +27,10 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMRecord; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Jul 19, 2010 + * Filter out reads that fail the vendor quality check. * - * Filter out FailsVendorQualityCheck reads. + * @author rpoplin + * @since Jul 19, 2010 */ public class FailsVendorQualityCheckFilter extends ReadFilter { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MateSameStrandFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MateSameStrandFilter.java index 9579aac05..700893fcf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MateSameStrandFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MateSameStrandFilter.java @@ -3,11 +3,10 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMRecord; /** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 5/18/11 - * Time: 4:25 PM - * To change this template use File | Settings | File Templates. + * Filter out reads that are not paired, have their mate unmapped, are duplicates, fail vendor quality check or both mate and read are in the same strand. + * + * @author chartl + * @since 5/18/11 */ public class MateSameStrandFilter extends ReadFilter { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java index 7bcee033f..25f90f2a2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java @@ -4,11 +4,10 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; /** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 5/2/11 - * Time: 12:20 PM - * To change this template use File | Settings | File Templates. + * Filter out reads that exceed a given max insert size + * + * @author chartl + * @since 5/2/11 */ public class MaxInsertSizeFilter extends ReadFilter { @Argument(fullName = "maxInsertSize", shortName = "maxInsert", doc="Discard reads with insert size greater than the specified value, defaults to 1000000", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java index 29738e499..02da2cf23 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java @@ -28,11 +28,10 @@ import net.sf.samtools.SAMRecord; */ /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 19, 2009 + * Filter out reads that don't have base an original quality quality score tag (usually added by BQSR) * - * Filter out reads that don't have Original Quality scores inside. + * @author rpoplin + * @since Nov 19, 2009 */ public class NoOriginalQualityScoresFilter extends ReadFilter { public boolean filterOut( final SAMRecord read ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java index 50cd30f71..c26f17f88 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java @@ -27,11 +27,10 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMRecord; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Dec 9, 2009 - * * Filter out duplicate reads. + * + * @author rpoplin + * @since Dec 9, 2009 */ public class NotPrimaryAlignmentFilter extends ReadFilter { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java index 81044b888..9ac8a5847 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java @@ -8,11 +8,10 @@ import java.util.HashSet; import java.util.Set; /** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Sep 21, 2009 - * Time: 2:54:23 PM - * To change this template use File | Settings | File Templates. + * Filter out reads that have blacklisted platform unit tags. (See code documentation for how to create the blacklist). + * + * @author asivache + * @since Sep 21, 2009 */ public class PlatformUnitFilter extends ReadFilter { // a hack: use static in order to be able to fill it with the data from command line at runtime diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java index a56af56d1..b0be2c7af 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java @@ -1,17 +1,13 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; /** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 9/19/11 - * Time: 4:09 PM - * To change this template use File | Settings | File Templates. + * Filter out all reads except those with this read name + * + * @author chartl + * @since 9/19/11 */ public class ReadNameFilter extends ReadFilter { @Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java index 99d6bc154..7c0880b16 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java @@ -31,6 +31,9 @@ import org.broadinstitute.sting.commandline.Argument; import java.util.Set; +/** + * Filter out all reads except those with this sample + */ public class SampleFilter extends ReadFilter { @Argument(fullName = "sample_to_keep", shortName = "goodSM", doc="The name of the sample(s) to keep, filtering out all others", required=true) private Set SAMPLES_TO_KEEP = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java index 2f93cbcae..0f048b8a0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java @@ -30,11 +30,11 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 27, 2009 - * * Only use reads from the specified read group. + * + * @author rpoplin + * @since Nov 27, 2009 + * */ public class SingleReadGroupFilter extends ReadFilter { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java index e7ee345d2..5a965db99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java @@ -27,11 +27,10 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMRecord; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Dec 9, 2009 + * Filter out unmapped reads. * - * Filter out duplicate reads. + * @author rpoplin + * @since Dec 9, 2009 */ public class UnmappedReadFilter extends ReadFilter { diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java index cb8786be1..300e801e6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java @@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage alleles = new ArrayList(); - alleles.add(refAllele); + int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; + if ( index < 0 ) + return null; // we weren't given enough reference context to create the VariantContext - // add all of the alt alleles - boolean sawNullAllele = refAllele.isNull(); - for ( String alt : getAlternateAlleleList(dbsnp) ) { - if ( ! Allele.acceptableAlleleBases(alt) ) { - //System.out.printf("Excluding dbsnp record %s%n", dbsnp); - return null; - } - Allele altAllele = Allele.create(alt, false); - alleles.add(altAllele); - if ( altAllele.isNull() ) - sawNullAllele = true; - } + final byte refBaseForIndel = ref.getBases()[index]; - Map attributes = new HashMap(); - - int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; - if ( index < 0 ) - return null; // we weren't given enough reference context to create the VariantContext - Byte refBaseForIndel = new Byte(ref.getBases()[index]); - - final VariantContextBuilder builder = new VariantContextBuilder(); - builder.source(name).id(dbsnp.getRsID()); - builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0)); - builder.alleles(alleles); - builder.referenceBaseForIndel(refBaseForIndel); - return builder.make(); - } else + boolean addPaddingBase; + if ( isSNP(dbsnp) || isMNP(dbsnp) ) + addPaddingBase = false; + else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) + addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp))); + else return null; // can't handle anything else + + Allele refAllele; + if ( dbsnp.getNCBIRefBase().equals("-") ) + refAllele = Allele.create(refBaseForIndel, true); + else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) + return null; + else + refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true); + + final List alleles = new ArrayList(); + alleles.add(refAllele); + + // add all of the alt alleles + for ( String alt : getAlternateAlleleList(dbsnp) ) { + if ( Allele.wouldBeNullAllele(alt.getBytes())) + alt = ""; + else if ( ! Allele.acceptableAlleleBases(alt) ) + return null; + + alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false)); + } + + final VariantContextBuilder builder = new VariantContextBuilder(); + builder.source(name).id(dbsnp.getRsID()); + builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0)); + builder.alleles(alleles); + return builder.make(); + } + + private static List stripNullDashes(final List alleles) { + final List newAlleles = new ArrayList(alleles.size()); + for ( final String allele : alleles ) { + if ( allele.equals("-") ) + newAlleles.add(""); + else + newAlleles.add(allele); + } + return newAlleles; } } @@ -294,7 +309,6 @@ public class VariantContextAdaptors { int index = hapmap.getStart() - ref.getWindow().getStart(); if ( index < 0 ) return null; // we weren't given enough reference context to create the VariantContext - Byte refBaseForIndel = new Byte(ref.getBases()[index]); HashSet alleles = new HashSet(); Allele refSNPAllele = Allele.create(ref.getBase(), true); @@ -351,7 +365,7 @@ public class VariantContextAdaptors { long end = hapmap.getEnd(); if ( deletionLength > 0 ) end += deletionLength; - VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make(); + VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make(); return vc; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index bec1ea543..47bc48f81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -89,9 +89,9 @@ public class GATKReport { reader = new BufferedReader(new FileReader(file)); reportHeader = reader.readLine(); } catch (FileNotFoundException e) { - throw new ReviewedStingException("Could not open file : " + file); + throw new UserException.CouldNotReadInputFile(file, "it does not exist"); } catch (IOException e) { - throw new ReviewedStingException("Could not read file : " + file); + throw new UserException.CouldNotReadInputFile(file, e); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 7a272e155..3b4bdd087 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -208,11 +208,23 @@ public class GATKReportTable { } /** - * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed - * - * @param name the name of the table or column - * @return true if the name is valid, false if otherwise + * Create a new GATKReportTable with the same structure + * @param tableToCopy */ + public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) { + this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID); + for ( final GATKReportColumn column : tableToCopy.getColumnInfo() ) + addColumn(column.getColumnName(), column.getFormat()); + if ( copyData ) + throw new IllegalArgumentException("sorry, copying data in GATKReportTable isn't supported"); + } + + /** + * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed + * + * @param name the name of the table or column + * @return true if the name is valid, false if otherwise + */ private boolean isValidName(String name) { Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX); Matcher m = p.matcher(name); @@ -490,6 +502,17 @@ public class GATKReportTable { return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName)); } + /** + * Get a value from the given position in the table + * + * @param rowIndex the row ID + * @param columnName the name of the column + * @return the value stored at the specified position in the table + */ + public Object get(final int rowIndex, final String columnName) { + return get(rowIndex, columnNameToIndex.get(columnName)); + } + /** * Get a value from the given position in the table * diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 6d0ec0e7c..979e0f2d6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -6,11 +6,14 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -26,9 +29,9 @@ public class TraverseActiveRegions extends TraversalEngine workQueue = new LinkedList(); + private final LinkedList workQueue = new LinkedList(); private final LinkedHashSet myReads = new LinkedHashSet(); @Override @@ -67,8 +70,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ); + final List activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ); // add active regions to queue of regions to process // first check if can merge active regions over shard boundaries if( !activeRegions.isEmpty() ) { if( !workQueue.isEmpty() ) { - final ActiveRegion last = workQueue.getLast(); - final ActiveRegion first = activeRegions.get(0); + final org.broadinstitute.sting.utils.activeregion.ActiveRegion last = workQueue.getLast(); + final org.broadinstitute.sting.utils.activeregion.ActiveRegion first = activeRegions.get(0); if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { workQueue.removeLast(); activeRegions.remove(first); - workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) ); + workQueue.add( new org.broadinstitute.sting.utils.activeregion.ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) ); } } workQueue.addAll( activeRegions ); @@ -142,11 +143,11 @@ public class TraverseActiveRegions extends TraversalEngine walker, + private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker walker, final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext locus, final GenomeLoc location) { if ( walker.hasPresetActiveRegions() ) { - return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0; + return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); } else { return walker.isActive( tracker, refContext, locus ); } @@ -183,7 +184,7 @@ public class TraverseActiveRegions extends TraversalEngine walker ) { // Just want to output the active regions to a file, not actually process them - for( final ActiveRegion activeRegion : workQueue ) { + for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion : workQueue ) { if( activeRegion.isActive ) { walker.activeRegionOutStream.println( activeRegion.getLocation() ); } @@ -196,7 +197,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine reads, final Queue workQueue, final T sum, final ActiveRegionWalker walker ) { + private T processActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final LinkedHashSet reads, final Queue workQueue, final T sum, final ActiveRegionWalker walker ) { final ArrayList placedReads = new ArrayList(); for( final GATKSAMRecord read : reads ) { final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read ); if( activeRegion.getLocation().overlapsP( readLoc ) ) { // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region) long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc ); - ActiveRegion bestRegion = activeRegion; - for( final ActiveRegion otherRegionToTest : workQueue ) { + org.broadinstitute.sting.utils.activeregion.ActiveRegion bestRegion = activeRegion; + for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion otherRegionToTest : workQueue ) { if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) { maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( readLoc ); bestRegion = otherRegionToTest; @@ -227,7 +228,7 @@ public class TraverseActiveRegions extends TraversalEngine extends Walker { @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) @@ -73,10 +74,10 @@ public abstract class ActiveRegionWalker extends Walker { +public class ClipReads extends ReadWalker { /** * If provided, ClipReads will write summary statistics about the clipping operations applied * to the reads to this file. @@ -571,7 +574,7 @@ public class ClipReadsWalker extends ReadWalker clipSeqs) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java deleted file mode 100644 index 7f9269725..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers; - -import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.HashSet; -import java.util.Set; - -/** - * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear - * in the input file. It can dynamically merge the contents of multiple input BAM files, resulting - * in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter - * command line argument. - */ -@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) -@Requires({DataSource.READS, DataSource.REFERENCE}) -public class FindReadsWithNamesWalker extends ReadWalker { - /** an optional argument to dump the reads out to a BAM file */ - @Output(doc="Write output to this BAM filename instead of STDOUT") - SAMFileWriter out; - @Argument(fullName = "readNamesToKeep", shortName = "rn", doc="names to keep", required = true) - File readNamesFile = null; - - Set namesToKeep; - - - /** - * The initialize function. - */ - public void initialize() { - try { - namesToKeep = new HashSet(new XReadLines(readNamesFile).readLines()); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(readNamesFile, e); - } - } - - /** - * The reads filter function. - * - * @param ref the reference bases that correspond to our read, if a reference was provided - * @param read the read itself, as a SAMRecord - * @return true if the read passes the filter, false if it doesn't - */ - public boolean filter(ReferenceContext ref, GATKSAMRecord read) { - return namesToKeep.contains(read.getReadName()); - } - - /** - * The reads map function. - * - * @param ref the reference bases that correspond to our read, if a reference was provided - * @param read the read itself, as a SAMRecord - * @return the read itself - */ - public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { - return read; - } - - /** - * reduceInit is called once before any calls to the map function. We use it here to setup the output - * bam file, if it was specified on the command line - * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise - */ - public SAMFileWriter reduceInit() { - return out; - } - - /** - * given a read and a output location, reduce by emitting the read - * @param read the read itself - * @param output the output source - * @return the SAMFileWriter, so that the next reduce can emit to the same source - */ - public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) { - output.addAlignment(read); - return output; - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java similarity index 93% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java index 0777037bf..e881dcab7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java @@ -1,8 +1,10 @@ package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; @@ -41,13 +43,14 @@ import java.text.NumberFormat; * reads with QC failure flag set, number of duplicates, percentage mapped, etc. * @author aaron */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS}) -public class FlagStatWalker extends ReadWalker { +public class FlagStat extends ReadWalker { @Output PrintStream out; // what comes out of the flagstat - static class FlagStat { + static class FlagStatus { long readCount = 0L; long QC_failure = 0L; long duplicates = 0L; @@ -117,7 +120,7 @@ public class FlagStatWalker extends ReadWalker { } - private FlagStat myStat = new FlagStat(); + private FlagStatus myStat = new FlagStatus(); public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { myStat.readCount++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index e94d01d5a..2a92d8831 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) @PartitionBy(PartitionType.LOCUS) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) +@RemoveProgramRecords public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java index b3062c546..0eb3a628d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java @@ -30,10 +30,12 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -60,7 +62,8 @@ import java.util.List; * Associated command: * samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] */ -public class PileupWalker extends LocusWalker implements TreeReducible { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class Pileup extends LocusWalker implements TreeReducible { @Output PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODs.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODs.java index 7960f5c35..d7ae3050e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODs.java @@ -29,9 +29,11 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; @@ -39,7 +41,8 @@ import java.io.PrintStream; * Prints out all of the RODs in the input data set. Data is rendered using the toString() method * of the given ROD. */ -public class PrintRODsWalker extends RodWalker { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class PrintRODs extends RodWalker { @Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true) public RodBinding input; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java index 4159a67b7..8257794d7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java @@ -29,11 +29,13 @@ import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -88,9 +90,10 @@ import java.util.TreeSet; * * */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class PrintReadsWalker extends ReadWalker { +public class PrintReads extends ReadWalker { @Output(doc="Write output to this BAM filename instead of STDOUT", required = true) SAMFileWriter out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java new file mode 100644 index 000000000..d9abc7925 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java @@ -0,0 +1,21 @@ +package org.broadinstitute.sting.gatk.walkers; + +/** + * Created with IntelliJ IDEA. + * User: thibault + * Date: 8/2/12 + * Time: 1:58 PM + * To change this template use File | Settings | File Templates. + */ + +import java.lang.annotation.*; + +/** + * Indicates that program records should be removed from SAM headers by default for this walker + */ +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface RemoveProgramRecords { +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFile.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFile.java index baaaf9e28..8553569e8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFile.java @@ -31,8 +31,10 @@ import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -45,16 +47,17 @@ import java.util.Map; * Divides the input data set into separate BAM files, one for each sample in the input data set. The split * files are named concatenating the sample name to the end of the provided outputRoot command-line argument. */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @WalkerName("SplitSamFile") @Requires({DataSource.READS}) -public class SplitSamFileWalker extends ReadWalker> { +public class SplitSamFile extends ReadWalker> { @Argument(fullName="outputRoot", doc="output BAM file", required=false) public String outputRoot = null; @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false) public Integer BAMcompression = 5; - private static Logger logger = Logger.getLogger(SplitSamFileWalker.class); + private static Logger logger = Logger.getLogger(SplitSamFile.class); private static String VERSION = "0.0.1"; public void initialize() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 18c383ed9..6cd2e8aea 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -49,10 +49,7 @@ import java.util.List; @ReadFilters(MalformedReadFilter.class) @PartitionBy(PartitionType.NONE) @BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) -@DocumentedGATKFeature( - groupName = "GATK walkers", - summary = "General tools available for running on the command line as part of the GATK package", - extraDocs = {CommandLineGATK.class}) +@DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class}) public abstract class Walker { final protected static Logger logger = Logger.getLogger(Walker.class); private GenomeAnalysisEngine toolkit; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index dc589699a..30f81b20c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -51,7 +51,7 @@ public class AlleleBalance extends InfoFieldAnnotation { char[] BASES = {'A','C','G','T'}; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index fd23fb8f6..11c9c3a99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.MathUtils; @@ -14,7 +14,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; /** @@ -22,7 +24,7 @@ import java.util.*; */ public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { - public void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, final GenotypeBuilder gb) { + public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, final GenotypeBuilder gb) { Double ratio = annotateSNP(stratifiedContext, vc, g); if (ratio == null) return; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 46aa6d0f3..c3b6de65a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -34,7 +34,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -52,7 +52,7 @@ import java.util.Map; */ public class BaseCounts extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 8bc5f06f4..bd884892c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -15,7 +16,7 @@ import java.util.*; * The u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele). * Note that the base quality rank sum test can not be calculated for homozygous sites. */ -public class BaseQualityRankSumTest extends RankSumTest { +public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation { public List getKeyNames() { return Arrays.asList("BaseQRankSum"); } public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); } @@ -64,12 +65,12 @@ public class BaseQualityRankSumTest extends RankSumTest { // by design, first element in LinkedHashMap was ref allele double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - for (Allele a : el.keySet()) { + for (Map.Entry entry : el.entrySet()) { - if (a.isReference()) - refLikelihood =el.get(a); + if (entry.getKey().isReference()) + refLikelihood = entry.getValue(); else { - double like = el.get(a); + double like = entry.getValue(); if (like >= altLikelihood) altLikelihood = like; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 3d07e3a7a..54837baad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -29,18 +29,17 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -62,14 +61,14 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn private Set founderIds = new HashSet(); - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( ! vc.hasGenotypes() ) return null; return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap(), true,founderIds); } - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ){ + public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set headerLines ){ //If families were given, get the founders ids founderIds = ((Walker)walker).getSampleDB().getFounderIds(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java index 5403e19dc..f41a40621 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java @@ -18,7 +18,7 @@ import java.util.*; * Date: 6/28/12 */ -public class ClippingRankSumTest /*extends RankSumTest*/ { +public class ClippingRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("ClippingRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index b36de0dac..28ca77f18 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; @@ -38,7 +38,7 @@ import java.util.Map; */ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index d56daadea..a9edab752 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -14,7 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; /** @@ -38,21 +42,17 @@ import java.util.*; */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { - private static final String REF_ALLELE = "REF"; - - private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - - public void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) { + public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) { if ( g == null || !g.isCalled() ) return; if ( vc.isSNP() ) annotateSNP(stratifiedContext, vc, gb); else if ( vc.isIndel() ) - annotateIndel(stratifiedContext, vc, gb); + annotateIndel(stratifiedContext, ref.getBase(), vc, gb); } - private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { + private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) { HashMap alleleCounts = new HashMap(); for ( Allele allele : vc.getAlleles() ) @@ -73,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa gb.AD(counts); } - private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { + private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) { ReadBackedPileup pileup = stratifiedContext.getBasePileup(); if ( pileup == null ) return; - final HashMap alleleCounts = new HashMap(); - alleleCounts.put(REF_ALLELE, 0); + final HashMap alleleCounts = new HashMap(); final Allele refAllele = vc.getReference(); - for ( Allele allele : vc.getAlternateAlleles() ) { - - if ( allele.isNoCall() ) { - continue; // this does not look so good, should we die??? - } - - alleleCounts.put(getAlleleRepresentation(allele), 0); + for ( final Allele allele : vc.getAlleles() ) { + alleleCounts.put(allele, 0); } for ( PileupElement p : pileup ) { if ( p.isBeforeInsertion() ) { - final String b = p.getEventBases(); - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); + final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false); + if ( alleleCounts.containsKey(insertion) ) { + alleleCounts.put(insertion, alleleCounts.get(insertion)+1); } } else if ( p.isBeforeDeletionStart() ) { - if ( p.getEventLength() == refAllele.length() ) { - // this is indeed the deletion allele recorded in VC - final String b = DEL; - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } + if ( p.getEventLength() == refAllele.length() - 1 ) { + // this is indeed the deletion allele recorded in VC + final Allele deletion = Allele.create(refBase); + if ( alleleCounts.containsKey(deletion) ) { + alleleCounts.put(deletion, alleleCounts.get(deletion)+1); } + } } else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) { - alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1); + alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1); } } - int[] counts = new int[alleleCounts.size()]; - counts[0] = alleleCounts.get(REF_ALLELE); + final int[] counts = new int[alleleCounts.size()]; + counts[0] = alleleCounts.get(refAllele); for (int i = 0; i < vc.getAlternateAlleles().size(); i++) - counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) ); + counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) ); gb.AD(counts); } - private String getAlleleRepresentation(Allele allele) { - if ( allele.isNull() ) { // deletion wrt the ref - return DEL; - } else { // insertion, pass actual bases - return allele.getBaseString(); - } - - } - // public String getIndelBases() public List getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index e3ee8c923..131670599 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -29,7 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; @@ -55,7 +55,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( !vc.isVariant() ) return null; @@ -255,12 +255,9 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat for ( Map.Entry sample : stratifiedContexts.entrySet() ) { for (PileupElement p : sample.getValue().getBasePileup()) { - if ( p.isDeletion() || p.getRead().isReducedRead() ) // ignore deletions and reduced reads + if ( ! RankSumTest.isUsableBase(p, false) || p.getRead().isReducedRead() ) // ignore deletions and reduced reads continue; - if ( p.getRead().getMappingQuality() < 20 || p.getQual() < 20 ) - continue; // todo -- fixme, should take filtered context! - Allele base = Allele.create(p.getBase(), false); boolean isFW = !p.getRead().getReadNegativeStrandFlag(); @@ -294,16 +291,14 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat int[][] table = new int[2][2]; - for ( String sample : stratifiedContexts.keySet() ) { - final AlignmentContext context = stratifiedContexts.get(sample); + for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + final AlignmentContext context = sample.getValue(); if ( context == null ) continue; final ReadBackedPileup pileup = context.getBasePileup(); for ( final PileupElement p : pileup ) { - if ( p.getRead().isReducedRead() ) // ignore reduced reads - continue; - if ( p.getRead().getMappingQuality() < 20 ) + if ( ! RankSumTest.isUsableBase(p, true) || p.getRead().isReducedRead() ) // ignore reduced reads continue; if ( indelLikelihoodMap.containsKey(p) ) { // to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element. @@ -318,12 +313,12 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - for (Allele a : el.keySet()) { + for (Map.Entry entry : el.entrySet()) { - if (a.isReference()) - refLikelihood =el.get(a); + if (entry.getKey().isReference()) + refLikelihood = entry.getValue(); else { - double like = el.get(a); + double like = entry.getValue(); if (like >= altLikelihood) altLikelihood = like; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 11a64b49f..fba30b3f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -1,14 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; @@ -20,9 +22,10 @@ import java.util.Map; /** * The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { double content = computeGCContent(ref); Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%.2f", content)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 606de1b88..c6d8883c5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -28,7 +28,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; @@ -60,7 +60,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; private final static char REGEXP_WILDCARD = '.'; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here return null; @@ -103,7 +103,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot return map; } - private class HaplotypeComparator implements Comparator { + private static class HaplotypeComparator implements Comparator { public int compare(Haplotype a, Haplotype b) { if (a.getQualitySum() < b.getQualitySum()) @@ -362,8 +362,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot // Score all the reads in the pileup, even the filtered ones final double[] scores = new double[el.size()]; int i = 0; - for (Allele a : el.keySet()) { - scores[i++] = -el.get(a); + for (Map.Entry a : el.entrySet()) { + scores[i++] = -a.getValue(); if (DEBUG) { System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 795cdbeb5..6ba85de07 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -4,7 +4,7 @@ import org.broad.tribble.util.popgen.HardyWeinbergCalculation; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation; import org.broadinstitute.sting.utils.QualityUtils; @@ -29,7 +29,7 @@ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgress private static final int MIN_GENOTYPE_QUALITY = 10; private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { final GenotypesContext genotypes = vc.getGenotypes(); if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 7c4b0b42e..9f20bf375 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -3,9 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -23,7 +22,7 @@ public class HomopolymerRun extends InfoFieldAnnotation { private boolean ANNOTATE_INDELS = true; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( !vc.isBiallelic() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 0d2b3478d..715895526 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -3,9 +3,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; @@ -33,7 +33,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno private static final int MIN_SAMPLES = 10; private Set founderIds; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { //If available, get the founder IDs and cache them. the IC will only be computed on founders then. if(founderIds == null) founderIds = ((Walker)walker).getSampleDB().getFounderIds(); @@ -68,6 +68,9 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno if ( g.isNoCall() || !g.hasLikelihoods() ) continue; + if (g.getPloidy() != 2) // only work for diploid samples + continue; + N++; final double[] normalizedLikelihoods = MathUtils.normalizeFromLog10( g.getLikelihoods().getAsVector() ); refCount += normalizedLikelihoods[idxAA]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index e0abfcf3c..babaf7ee6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.IndelUtils; @@ -18,7 +18,7 @@ import java.util.*; */ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { int run; if (vc.isMixed()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 4651782ef..7f5033adf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -3,12 +3,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; @@ -22,7 +21,7 @@ import java.util.Map; */ public class LowMQ extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index 07fbfc3d2..b6f24433e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; @@ -32,7 +32,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment private String fatherId; private String childId; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( mendelianViolation == null ) { if (checkAndSetSamples(((Walker) walker).getSampleDB())) { mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 4ce19e824..31067e386 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -16,7 +17,7 @@ import java.util.*; * The u-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele) * Note that the mapping quality rank sum test can not be calculated for homozygous sites. */ -public class MappingQualityRankSumTest extends RankSumTest { +public class MappingQualityRankSumTest extends RankSumTest implements StandardAnnotation { public List getKeyNames() { return Arrays.asList("MQRankSum"); } @@ -60,12 +61,12 @@ public class MappingQualityRankSumTest extends RankSumTest { // by design, first element in LinkedHashMap was ref allele double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - for (Allele a : el.keySet()) { + for (Map.Entry a : el.entrySet()) { - if (a.isReference()) - refLikelihood =el.get(a); + if (a.getKey().isReference()) + refLikelihood = a.getValue(); else { - double like = el.get(a); + double like = a.getValue(); if (like >= altLikelihood) altLikelihood = like; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 780206f30..372d5bc9e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; @@ -24,7 +24,7 @@ import java.util.Map; */ public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index c12923be5..b5252f15b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; @@ -47,7 +47,7 @@ import java.util.List; */ public class MappingQualityZeroBySample extends GenotypeAnnotation { public void annotate(RefMetaDataTracker tracker, - AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, + AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g, GenotypeBuilder gb) { if ( g == null || !g.isCalled() ) return; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index e8490959c..9f542e3bd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -22,7 +22,7 @@ import java.util.Map; */ public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 8f0bc876a..ba4303b4a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -20,7 +20,7 @@ import java.util.Map; * The number of N bases, counting only SOLiD data */ public class NBaseCount extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 5da6492a6..b62cd374b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -28,7 +28,7 @@ import java.util.Map; */ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( !vc.hasLog10PError() || stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index fb5767b34..842fde8ad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; @@ -29,7 +29,7 @@ import java.util.Map; */ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index cdef1f77c..bf6adcfac 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -4,9 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.QualityUtils; @@ -28,11 +27,11 @@ import java.util.Map; /** * Abstract root for all RankSum based annotations */ -public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { +public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation { static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if (stratifiedContexts.size() == 0) return null; @@ -149,9 +148,28 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar protected abstract void fillIndelQualsFromPileup(final ReadBackedPileup pileup, final List refQuals, final List altQuals); - protected static boolean isUsableBase(final PileupElement p) { + /** + * Can the base in this pileup element be used in comparative tests between ref / alt bases? + * + * Note that this function by default does not allow deletion pileup elements + * + * @param p the pileup element to consider + * @return true if this base is part of a meaningful read for comparison, false otherwise + */ + public static boolean isUsableBase(final PileupElement p) { + return isUsableBase(p, false); + } + + /** + * Can the base in this pileup element be used in comparative tests between ref / alt bases? + * + * @param p the pileup element to consider + * @param allowDeletions if true, allow p to be a deletion base + * @return true if this base is part of a meaningful read for comparison, false otherwise + */ + public static boolean isUsableBase(final PileupElement p, final boolean allowDeletions) { return !(p.isInsertionAtBeginningOfRead() || - p.isDeletion() || + (! allowDeletions && p.isDeletion()) || p.getMappingQual() == 0 || p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); // need the unBAQed quality score here diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 40bf6bbd8..3456041c7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -4,6 +4,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -21,7 +22,7 @@ import java.util.*; * The u-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error). * Note that the read position rank sum test can not be calculated for homozygous sites. */ -public class ReadPosRankSumTest extends RankSumTest { +public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotation { public List getKeyNames() { return Arrays.asList("ReadPosRankSum"); @@ -86,11 +87,11 @@ public class ReadPosRankSumTest extends RankSumTest { LinkedHashMap el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele - for (Allele a : el.keySet()) { - if (a.isReference()) - refLikelihood = el.get(a); + for (Map.Entry a : el.entrySet()) { + if (a.getKey().isReference()) + refLikelihood = a.getValue(); else { - double like = el.get(a); + double like = a.getValue(); if (like >= altLikelihood) altLikelihood = like; } @@ -99,7 +100,6 @@ public class ReadPosRankSumTest extends RankSumTest { int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset()); final int numAlignedBases = getNumAlignedBases(p.getRead()); - int rp = readPos; if (readPos > numAlignedBases / 2) { readPos = numAlignedBases - (readPos + 1); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index cbf536e4f..7e4d44cf2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -46,7 +46,7 @@ import java.util.Map; */ public class SampleList extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 6c860fce6..4d990e738 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.utils.Utils; @@ -203,7 +203,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio } } - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { + public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set headerLines ) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff // without providing a SnpEff rod via --snpEffFile): validateRodBinding(walker.getSnpEffRodBinding()); @@ -225,7 +225,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); } - public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { + public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { RodBinding snpEffRodBinding = walker.getSnpEffRodBinding(); // Get only SnpEff records that start at this locus, not merely span it: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index d7529c8ef..af2df8e6a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -22,7 +22,7 @@ import java.util.Map; */ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java index c4d6ea474..eced387b3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java @@ -27,10 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -48,7 +47,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa private static final String STR_PRESENT = "STR"; private static final String REPEAT_UNIT_KEY = "RU"; private static final String REPEATS_PER_ALLELE_KEY = "RPA"; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( !vc.isIndel()) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index f4feb9aa8..63694d809 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -28,7 +28,7 @@ public class TechnologyComposition extends InfoFieldAnnotation implements Experi private String n454 ="Num454"; private String nSolid = "NumSOLiD"; private String nOther = "NumOther"; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index 1f8ccf652..2e3578dcb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.samples.Sample; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; @@ -28,7 +28,7 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen private Set trios = null; private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( trios == null ) { if ( walker instanceof VariantAnnotator ) { trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 8f7f46c20..cce106210 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -38,8 +39,9 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.util.*; @@ -75,11 +77,12 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Requires(value={}) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @Reference(window=@Window(start=-50,stop=50)) @By(DataSource.REFERENCE) -public class VariantAnnotator extends RodWalker implements AnnotatorCompatibleWalker { +public class VariantAnnotator extends RodWalker implements AnnotatorCompatible { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 46212c19c..073faf54e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -46,7 +46,7 @@ public class VariantAnnotatorEngine { private List requestedExpressions = new ArrayList(); private final HashMap, String> dbAnnotations = new HashMap, String>(); - private final AnnotatorCompatibleWalker walker; + private final AnnotatorCompatible walker; private final GenomeAnalysisEngine toolkit; private boolean requireStrictAlleleMatch = false; @@ -75,7 +75,7 @@ public class VariantAnnotatorEngine { } // use this constructor if you want all possible annotations - public VariantAnnotatorEngine(List annotationsToExclude, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) { + public VariantAnnotatorEngine(List annotationsToExclude, AnnotatorCompatible walker, GenomeAnalysisEngine toolkit) { this.walker = walker; this.toolkit = toolkit; requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations(); @@ -85,7 +85,7 @@ public class VariantAnnotatorEngine { } // use this constructor if you want to select specific annotations (and/or interfaces) - public VariantAnnotatorEngine(List annotationGroupsToUse, List annotationsToUse, List annotationsToExclude, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) { + public VariantAnnotatorEngine(List annotationGroupsToUse, List annotationsToUse, List annotationsToExclude, AnnotatorCompatible walker, GenomeAnalysisEngine toolkit) { this.walker = walker; this.toolkit = toolkit; initializeAnnotations(annotationGroupsToUse, annotationsToUse, annotationsToExclude); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java similarity index 93% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java index 1331ad5df..f610a2ba8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java @@ -5,7 +5,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.List; -public interface AnnotatorCompatibleWalker { +public interface AnnotatorCompatible { // getter methods for various used bindings public abstract RodBinding getSnpEffRodBinding(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index c44c834de..bc20f6c97 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -9,12 +9,11 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.List; -import java.util.Map; public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts/genotype split by sample - public abstract void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, + public abstract void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index b94bee31b..1569a605f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -11,7 +11,7 @@ import java.util.Map; public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts split by sample - public abstract Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, + public abstract Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc); // return the descriptions used for the VCF INFO meta field diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java index 521f89016..996d85a67 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java @@ -37,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation { public abstract List getKeyNames(); // initialization method (optional for subclasses, and therefore non-abstract) - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { } + public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set headerLines ) { } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java index 31006f4d8..9eb0e4dda 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java @@ -26,18 +26,20 @@ package org.broadinstitute.sting.gatk.walkers.beagle; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.codecs.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.beagle.BeagleFeature; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.util.*; @@ -72,7 +74,8 @@ import static java.lang.Math.log10;

Note that Beagle produces some of these files compressed as .gz, so gunzip must be run on them before walker is run in order to decompress them

*/ -public class BeagleOutputToVCFWalker extends RodWalker { +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) +public class BeagleOutputToVCF extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @@ -247,8 +250,6 @@ public class BeagleOutputToVCFWalker extends RodWalker { // Beagle always produces genotype strings based on the strings we input in the likelihood file. String refString = vc_input.getReference().getDisplayString(); - if (refString.length() == 0) // ref was null - refString = Allele.NULL_ALLELE_STRING; Allele bglAlleleA, bglAlleleB; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java index a8e280f43..fdc333676 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.beagle; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -37,10 +38,14 @@ import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibratio import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.io.File; import java.io.PrintStream; @@ -72,7 +77,8 @@ import java.util.*; * */ -public class ProduceBeagleInputWalker extends RodWalker { +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) +public class ProduceBeagleInput extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @@ -233,7 +239,7 @@ public class ProduceBeagleInputWalker extends RodWalker { if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t"); for ( Allele allele : preferredVC.getAlleles() ) { String bglPrintString; - if (allele.isNoCall() || allele.isNull()) + if (allele.isNoCall()) bglPrintString = "-"; else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java similarity index 92% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java index d6b19b377..a6a6d484e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -38,12 +39,13 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.io.PrintStream; import java.util.Arrays; @@ -51,10 +53,11 @@ import java.util.Set; /** * Produces an input file to Beagle imputation engine, listing unphased, hard-called genotypes for a single sample - * in input variant file. Will additional hold back a fraction of the sites for evaluation, marking the + * in input variant file. Will additionally hold back a fraction of the sites for evaluation, marking the * genotypes at that sites as missing, and writing the truth of these sites to a second VCF file */ -public class VariantsToBeagleUnphasedWalker extends RodWalker { +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) +public class VariantsToBeagleUnphased extends RodWalker { @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @@ -104,7 +107,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker GenomeLoc loc = context.getLocation(); VariantContext vc = tracker.getFirstValue(variants, loc); - if ( ProduceBeagleInputWalker.canBeOutputToBeagle(vc) ) { + if ( ProduceBeagleInput.canBeOutputToBeagle(vc) ) { // do we want to hold back this site? boolean makeMissing = dropSite(vc); @@ -146,7 +149,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker // write out the alleles at this site for ( Allele allele : vc.getAlleles() ) { - beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" "); + beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" "); } // write out sample level genotypes diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java index 122958ac2..a6d82d5b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.commandline.Gatherer; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; +import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; import java.io.File; import java.io.FileNotFoundException; @@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer { if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) { final File recal_out = new File(output.getName() + ".original"); final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport); - RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); } else if (!RAC.NO_PLOTS) { final File recal_out = new File(output.getName() + ".recal"); - RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); } generalReport.output(outputFile); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java new file mode 100755 index 000000000..f69a02002 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; +import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.QuantizationInfo; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; +import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.lang.reflect.Constructor; +import java.util.ArrayList; + +/** + * First pass of the base quality score recalibration -- Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide). + * + *

+ * This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating + * only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative + * of poor base quality. This walker generates tables based on various user-specified covariates (such as read group, + * reported quality score, cycle, and context). Since there is a large amount of data one can then calculate an empirical + * probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations. + * The output file is a table (of the several covariate values, num observations, num mismatches, empirical quality score). + *

+ * Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will be added for the user regardless of whether or not they were specified. + * + *

+ * + *

Input

+ *

+ * The input read data whose base quality scores need to be assessed. + *

+ * A database of known polymorphic sites to skip over. + *

+ * + *

Output

+ *

+ * A GATK Report file with many tables: + *

    + *
  1. The list of arguments
  2. + *
  3. The quantized qualities table
  4. + *
  5. The recalibration table by read group
  6. + *
  7. The recalibration table by quality score
  8. + *
  9. The recalibration table for all the optional covariates
  10. + *
+ * + * The GATK Report is intended to be easy to read by humans or computers. Check out the documentation of the GATKReport to learn how to manipulate this table. + *

+ * + *

Examples

+ *
+ * java -Xmx4g -jar GenomeAnalysisTK.jar \
+ *   -T BaseRecalibrator \
+ *   -I my_reads.bam \
+ *   -R resources/Homo_sapiens_assembly18.fasta \
+ *   -knownSites bundle/hg18/dbsnp_132.hg18.vcf \
+ *   -knownSites another/optional/setOfSitesToMask.vcf \
+ *   -o recal_data.grp
+ * 
+ */ + +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) +@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) +@By(DataSource.READS) +@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file +@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) // filter out all reads with zero or unavailable mapping quality +@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta +public class BaseRecalibrator extends LocusWalker implements TreeReducible { + @ArgumentCollection + private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates + + private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization + + private RecalibrationTables recalibrationTables; + + private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) + + private RecalibrationEngine recalibrationEngine; + + private int minimumQToUse; + + protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. + protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. + protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ + + private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; + + + /** + * Parse the -cov arguments and create a list of covariates to be used here + * Based on the covariates' estimates for initial capacity allocate the data hashmap + */ + public void initialize() { + + // check for unsupported access + if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals) + throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument"); + + if (RAC.FORCE_PLATFORM != null) + RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; + + if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified + throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION); + + if (RAC.LIST_ONLY) { + RecalUtils.listAvailableCovariates(logger); + System.exit(0); + } + RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table + + Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates + ArrayList requiredCovariates = covariates.getFirst(); + ArrayList optionalCovariates = covariates.getSecond(); + + requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; + int covariateIndex = 0; + for (final Covariate covariate : requiredCovariates) + requestedCovariates[covariateIndex++] = covariate; + for (final Covariate covariate : optionalCovariates) + requestedCovariates[covariateIndex++] = covariate; + + logger.info("The covariates being used here: "); + for (Covariate cov : requestedCovariates) { // list all the covariates being used + logger.info("\t" + cov.getClass().getSimpleName()); + cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection + } + + int numReadGroups = 0; + for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() ) + numReadGroups += header.getReadGroups().size(); + recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups); + + recalibrationEngine = initializeRecalibrationEngine(); + recalibrationEngine.initialize(requestedCovariates, recalibrationTables); + + minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN; + } + + private RecalibrationEngine initializeRecalibrationEngine() { + + final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class); + try { + Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null); + constructor.setAccessible(true); + return (RecalibrationEngine)constructor.newInstance(); + } + catch (Exception e) { + throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + recalibrationEngineClass.getSimpleName()); + } + } + + private boolean readHasBeenSkipped(GATKSAMRecord read) { + return read.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE); + } + + private boolean isLowQualityBase(GATKSAMRecord read, int offset) { + return read.getBaseQualities()[offset] < minimumQToUse; + } + + private boolean readNotSeen(GATKSAMRecord read) { + return !read.containsTemporaryAttribute(SEEN_ATTRIBUTE); + } + + /** + * For each read at this locus get the various covariate values and increment that location in the map based on + * whether or not the base matches the reference at this particular location + * + * @param tracker the reference metadata tracker + * @param ref the reference context + * @param context the alignment context + * @return returns 1, but this value isn't used in the reduce step + */ + public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + long countedSites = 0L; + if (tracker.getValues(RAC.knownSites).size() == 0) { // Only analyze sites not present in the provided known sites + for (final PileupElement p : context.getBasePileup()) { + final GATKSAMRecord read = p.getRead(); + final int offset = p.getOffset(); + + if (readHasBeenSkipped(read) || isLowQualityBase(read, offset)) // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases) + continue; + + if (readNotSeen(read)) { + read.setTemporaryAttribute(SEEN_ATTRIBUTE, true); + RecalUtils.parsePlatformForRead(read, RAC); + if (RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { + read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true); + continue; + } + read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); + } + + if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it + RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING || + RecalUtils.isColorSpaceConsistent(read, offset)) + recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap + } + countedSites++; + } + + return countedSites; + } + + /** + * Initialize the reduce step by returning 0L + * + * @return returns 0L + */ + public Long reduceInit() { + return 0L; + } + + /** + * The Reduce method doesn't do anything for this walker. + * + * @param mapped Result of the map. This value is immediately ignored. + * @param sum The summing CountedData used to output the CSV data + * @return returns The sum used to output the CSV data + */ + public Long reduce(Long mapped, Long sum) { + sum += mapped; + return sum; + } + + public Long treeReduce(Long sum1, Long sum2) { + sum1 += sum2; + return sum1; + } + + @Override + public void onTraversalDone(Long result) { + logger.info("Calculating quantized quality scores..."); + quantizeQualityScores(); + + logger.info("Writing recalibration report..."); + generateReport(); + logger.info("...done!"); + + if (!RAC.NO_PLOTS) { + logger.info("Generating recalibration plots..."); + generatePlots(); + } + + logger.info("Processed: " + result + " sites"); + } + + private void generatePlots() { + File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE; + if (recalFile != null) { + RecalibrationReport report = new RecalibrationReport(recalFile); + RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); + } + else + RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); + } + + + /** + * go through the quality score table and use the # observations and the empirical quality score + * to build a quality score histogram for quantization. Then use the QuantizeQual algorithm to + * generate a quantization map (recalibrated_qual -> quantized_qual) + */ + private void quantizeQualityScores() { + quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS); + } + + private void generateReport() { + PrintStream output; + try { + output = new PrintStream(RAC.RECAL_FILE); + } catch (FileNotFoundException e) { + throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created"); + } + + RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output); + } +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java deleted file mode 100644 index 274b0f8bb..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java +++ /dev/null @@ -1,110 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -import org.broadinstitute.sting.utils.QualityUtils; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Jan 6, 2010 - * - * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed. - * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. - */ - -public class Datum { - - long numObservations; // number of bases seen in total - long numMismatches; // number of bases seen that didn't match the reference - - private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero - - - //--------------------------------------------------------------------------------------------------------------- - // - // constructors - // - //--------------------------------------------------------------------------------------------------------------- - - public Datum() { - numObservations = 0L; - numMismatches = 0L; - } - - public Datum(long numObservations, long numMismatches) { - this.numObservations = numObservations; - this.numMismatches = numMismatches; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // increment methods - // - //--------------------------------------------------------------------------------------------------------------- - - synchronized void increment(final long incObservations, final long incMismatches) { - numObservations += incObservations; - numMismatches += incMismatches; - } - - synchronized void increment(final boolean isError) { - numObservations++; - numMismatches += isError ? 1:0; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // methods to derive empirical quality score - // - //--------------------------------------------------------------------------------------------------------------- - - double empiricalQualDouble() { - final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); - final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT); - final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); - } - - byte empiricalQualByte() { - final double doubleMismatches = (double) (numMismatches); - final double doubleObservations = (double) (numObservations); - return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40 - } - - @Override - public String toString() { - return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte()); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof Datum)) - return false; - Datum other = (Datum) o; - return numMismatches == other.numMismatches && numObservations == other.numObservations; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java deleted file mode 100755 index 9b00b1876..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ /dev/null @@ -1,148 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.QualityUtils; - -import java.util.Random; - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 3, 2009 - * - * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. - */ - -public class RecalDatum extends Datum { - - private static final double UNINITIALIZED = -1.0; - - private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations - private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) - - - //--------------------------------------------------------------------------------------------------------------- - // - // constructors - // - //--------------------------------------------------------------------------------------------------------------- - - public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) { - numObservations = _numObservations; - numMismatches = _numMismatches; - estimatedQReported = reportedQuality; - empiricalQuality = UNINITIALIZED; - } - - public RecalDatum(final RecalDatum copy) { - this.numObservations = copy.numObservations; - this.numMismatches = copy.numMismatches; - this.estimatedQReported = copy.estimatedQReported; - this.empiricalQuality = copy.empiricalQuality; - } - - public void combine(final RecalDatum other) { - final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); - increment(other.numObservations, other.numMismatches); - estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations); - empiricalQuality = UNINITIALIZED; - } - - @Override - public void increment(final boolean isError) { - super.increment(isError); - empiricalQuality = UNINITIALIZED; - } - - @Requires("empiricalQuality == UNINITIALIZED") - @Ensures("empiricalQuality != UNINITIALIZED") - protected final void calcEmpiricalQuality() { - empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again - } - - public void setEstimatedQReported(final double estimatedQReported) { - this.estimatedQReported = estimatedQReported; - } - - public final double getEstimatedQReported() { - return estimatedQReported; - } - - public void setEmpiricalQuality(final double empiricalQuality) { - this.empiricalQuality = empiricalQuality; - } - - public final double getEmpiricalQuality() { - if (empiricalQuality == UNINITIALIZED) - calcEmpiricalQuality(); - return empiricalQuality; - } - - @Override - public String toString() { - return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality())); - } - - public String stringForCSV() { - return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported()); - } - - private double calcExpectedErrors() { - return (double) this.numObservations * qualToErrorProb(estimatedQReported); - } - - private double qualToErrorProb(final double qual) { - return Math.pow(10.0, qual / -10.0); - } - - public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { - final Random random = new Random(); - final int nObservations = random.nextInt(maxObservations); - final int nErrors = random.nextInt(maxErrors); - final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE); - return new RecalDatum(nObservations, nErrors, (byte)qual); - } - - /** - * We don't compare the estimated quality reported because it may be different when read from - * report tables. - * - * @param o the other recal datum - * @return true if the two recal datums have the same number of observations, errors and empirical quality. - */ - @Override - public boolean equals(Object o) { - if (!(o instanceof RecalDatum)) - return false; - RecalDatum other = (RecalDatum) o; - return super.equals(o) && - MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 9d7ec7b26..f04e4a1b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -29,6 +29,7 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; import java.io.File; import java.util.Collections; @@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection { * reads which have had the reference inserted because of color space inconsistencies. */ @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") - public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO; + public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO; /** * CountCovariates and TableRecalibration accept a --solid_nocall_strategy flag which governs how the recalibrator handles @@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection { * their color space tag can not be recalibrated. */ @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false) - public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; + public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; /** * The context covariate will use a context of this size to calculate it's covariate value for base mismatches @@ -120,7 +121,7 @@ public class RecalibrationArgumentCollection { * The context covariate will use a context of this size to calculate it's covariate value for base insertions and deletions */ @Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false) - public int INDELS_CONTEXT_SIZE = 8; + public int INDELS_CONTEXT_SIZE = 3; /** * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off) @@ -177,41 +178,41 @@ public class RecalibrationArgumentCollection { public GATKReportTable generateReportTable() { GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); argumentsTable.addColumn("Argument"); - argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); argumentsTable.addRowID("covariate", true); - argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); + argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); argumentsTable.addRowID("no_standard_covs", true); - argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); + argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); argumentsTable.addRowID("run_without_dbsnp", true); - argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); + argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); argumentsTable.addRowID("solid_recal_mode", true); - argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE); + argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE); argumentsTable.addRowID("solid_nocall_strategy", true); - argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); + argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); argumentsTable.addRowID("mismatches_context_size", true); - argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); + argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); argumentsTable.addRowID("indels_context_size", true); - argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE); + argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE); argumentsTable.addRowID("mismatches_default_quality", true); - argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); + argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); argumentsTable.addRowID("insertions_default_quality", true); - argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY); + argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY); argumentsTable.addRowID("low_quality_tail", true); - argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL); + argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL); argumentsTable.addRowID("default_platform", true); - argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); + argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); argumentsTable.addRowID("force_platform", true); - argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); + argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); argumentsTable.addRowID("quantizing_levels", true); - argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); + argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); argumentsTable.addRowID("keep_intermediate_files", true); - argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); + argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); argumentsTable.addRowID("no_plots", true); - argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); + argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); argumentsTable.addRowID("recalibration_report", true); - argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); + argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); argumentsTable.addRowID("binary_tag_name", true); - argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); + argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); return argumentsTable; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java new file mode 100644 index 000000000..38e306939 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -0,0 +1,37 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; + +/* +* Copyright (c) 2009 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +* OTHER DEALINGS IN THE SOFTWARE. +*/ +public interface RecalibrationEngine { + + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables); + + public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase); + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java new file mode 100644 index 000000000..08c7da754 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -0,0 +1,114 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.classloader.PublicPackageSource; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.recalibration.RecalDatum; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource { + + protected Covariate[] covariates; + protected RecalibrationTables recalibrationTables; + + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + this.covariates = covariates; + this.recalibrationTables = recalibrationTables; + } + + /** + * Loop through the list of requested covariates and pick out the value from the read, offset, and reference + * Using the list of covariate values as a key, pick out the RecalDatum and increment, + * adding one to the number of observations and potentially one to the number of mismatches for mismatches only. + * + * @param pileupElement The pileup element to update + * @param refBase The reference base at this locus + */ + public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { + final int offset = pileupElement.getOffset(); + final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); + + final byte qual = pileupElement.getQual(); + final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); + + final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); + final int eventIndex = EventType.BASE_SUBSTITUTION.index; + + final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); + final RecalDatum rgThisDatum = createDatumObject(qual, isError); + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + rgRecalTable.put(rgThisDatum, keys[0], eventIndex); + else + rgPreviousDatum.combine(rgThisDatum); + + final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); + if (qualPreviousDatum == null) + qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); + else + qualPreviousDatum.increment(isError); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); + final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); + if (covPreviousDatum == null) + covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); + else + covPreviousDatum.increment(isError); + } + } + + /** + * creates a datum object with one observation and one or zero error + * + * @param reportedQual the quality score reported by the instrument for this base + * @param isError whether or not the observation is an error + * @return a new RecalDatum object with the observation and the error + */ + protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) { + return new RecalDatum(1, isError ? 1:0, reportedQual); + } + + /** + * Get the covariate key set from a read + * + * @param read the read + * @return the covariate keysets for this read + */ + protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { + return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java index 2a8940de0..58ddd0879 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import java.io.File; @@ -119,8 +121,9 @@ import java.io.PrintStream; * @author Mark DePristo * @since May 7, 2010 */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @By(DataSource.REFERENCE) -public class CallableLociWalker extends LocusWalker { +public class CallableLoci extends LocusWalker { @Output PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLoci.java similarity index 73% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLoci.java index cd5fdc505..420beaeda 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLoci.java @@ -27,12 +27,14 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; import java.util.Arrays; @@ -41,7 +43,8 @@ import java.util.List; /** * Test routine for new VariantContext object */ -public class CompareCallableLociWalker extends RodWalker, long[][]> { +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class CompareCallableLoci extends RodWalker, long[][]> { @Output protected PrintStream out; @@ -54,8 +57,8 @@ public class CompareCallableLociWalker extends RodWalker map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public List map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker != null ) { - CallableLociWalker.CallableBaseState comp1 = getCallableBaseState(tracker, compTrack1); - CallableLociWalker.CallableBaseState comp2 = getCallableBaseState(tracker, compTrack2); + CallableLoci.CallableBaseState comp1 = getCallableBaseState(tracker, compTrack1); + CallableLoci.CallableBaseState comp2 = getCallableBaseState(tracker, compTrack2); if ( printState != null && comp1.getState() == printState1 && comp2.getState() == printState2 ) { out.printf("%s %s %s %s%n", comp1.getLocation(), comp1.getState(), comp2.getLocation(), comp2.getState()); @@ -92,7 +95,7 @@ public class CompareCallableLociWalker extends RodWalker rodBinding) { + private CallableLoci.CallableBaseState getCallableBaseState(RefMetaDataTracker tracker, RodBinding rodBinding) { //System.out.printf("tracker %s%n", tracker); List bindings = tracker.getValues(rodBinding); if ( bindings.size() != 1 ) { @@ -101,8 +104,8 @@ public class CompareCallableLociWalker extends RodWalker comps, long[][] sum) { + public long[][] reduce(List comps, long[][] sum) { if ( comps != null ) { - CallableLociWalker.CallableBaseState comp1 = comps.get(0); - CallableLociWalker.CallableBaseState comp2 = comps.get(1); + CallableLoci.CallableBaseState comp1 = comps.get(0); + CallableLoci.CallableBaseState comp2 = comps.get(1); sum[comp1.getState().ordinal()][comp2.getState().ordinal()]++; } @@ -127,8 +130,8 @@ public class CompareCallableLociWalker extends RodWalker do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @By(DataSource.REFERENCE) @PartitionBy(PartitionType.NONE) @Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE) -public class DepthOfCoverageWalker extends LocusWalker>, CoveragePartitioner> implements TreeReducible { +public class DepthOfCoverage extends LocusWalker>, CoveragePartitioner> implements TreeReducible { @Output @Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"}) Map out; @@ -507,6 +510,38 @@ public class DepthOfCoverageWalker extends LocusWalker geneStats : statsByGene ) { printTargetSummary(geneSummaryOut,geneStats); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java similarity index 91% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java index 124be2eb4..1fe9c2e81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -32,6 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; import java.util.List; @@ -60,10 +62,11 @@ import java.util.List; * * */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Allows(value = {DataSource.REFERENCE}) @Requires(value = {DataSource.REFERENCE}) @By(DataSource.REFERENCE) -public class GCContentByIntervalWalker extends LocusWalker { +public class GCContentByInterval extends LocusWalker { @Output protected PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java index a48570fc6..471b23930 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -66,6 +68,7 @@ import java.io.PrintStream; * * @author Kiran Garimella, Mark DePristo */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class ErrorRatePerCycle extends LocusWalker { @Output PrintStream out; @Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java index e4e3c271e..9289f86e3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java @@ -27,12 +27,14 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.Median; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; @@ -94,6 +96,7 @@ import java.util.Map; * * @author Mark DePristo */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class ReadGroupProperties extends ReadWalker { @Output public PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java index 0f5432b80..1dc8a7ec1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java @@ -2,11 +2,13 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; @@ -44,8 +46,7 @@ import java.util.List; * @author Kiran Garimela */ - - +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class ReadLengthDistribution extends ReadWalker { @Output public PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java index 369731530..112eb278e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; @@ -80,6 +82,7 @@ import java.util.*; * @author Mauricio Carneiro, Roger Zurawicki * @since 5/8/12 */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @By(value = DataSource.READS) @PartitionBy(PartitionType.INTERVAL) public class DiagnoseTargets extends LocusWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index ac60f5f28..373c8232e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -24,7 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -33,30 +35,34 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.CONTIG) @ActiveRegionExtension(extension = 0, maxRegion = 50000) public class FindCoveredIntervals extends ActiveRegionWalker { @Output(required = true) private PrintStream out; + @Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false) + private int coverageThreshold = 20; + @Override // Look to see if the region has sufficient coverage - public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { + public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); // note the linear probability scale - int coverageThreshold = 20; - return Math.min((double) depth / coverageThreshold, 1); + return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1)); } @Override - public GenomeLoc map(final ActiveRegion activeRegion, final RefMetaDataTracker tracker) { + public GenomeLoc map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker tracker) { if (activeRegion.isActive) return activeRegion.getLocation(); else @@ -72,7 +78,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker { public Long reduce(final GenomeLoc value, Long reduce) { if (value != null) { out.println(value.toString()); - return reduce++; + return ++reduce; } else return reduce; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java index 1dc28b0e7..92e2e2dc4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java @@ -25,11 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.File; import java.io.PrintStream; @@ -130,7 +133,8 @@ import java.util.List; * @author Mark DePristo * @since 7/4/11 */ -public class DiffObjectsWalker extends RodWalker { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class DiffObjects extends RodWalker { /** * Writes out a file of the DiffEngine format: * @@ -144,7 +148,7 @@ public class DiffObjectsWalker extends RodWalker { * files to do the comparison. Conceptually master is the original file contained the expected * results, but this doesn't currently have an impact on the calculations, but might in the future. */ - @Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true) + @Input(fullName="master", shortName="m", doc="Master file: expected results", required=true) File masterFile; /** @@ -152,7 +156,7 @@ public class DiffObjectsWalker extends RodWalker { * files to do the comparison. Conceptually test is the derived file from master, but this * doesn't currently have an impact on the calculations, but might in the future. */ - @Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true) + @Input(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true) File testFile; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java similarity index 86% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java index 4e2c17bf6..8fbd37e30 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java @@ -27,12 +27,14 @@ package org.broadinstitute.sting.gatk.walkers.fasta; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Collections; @@ -45,7 +47,10 @@ import java.util.List; *

* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s). * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'. - * Note that if there are multiple variants at a site, it takes the first one seen. + * Several important notes: + * 1) if there are multiple variants that start at a site, it chooses one of them randomly. + * 2) when there are overlapping indels (but with different start positions) only the first will be chosen. + * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions). * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order). * *

Input

@@ -70,10 +75,11 @@ import java.util.List; * * */ +@DocumentedGATKFeature( groupName = "Companion Utilities", extraDocs = {CommandLineGATK.class} ) @WalkerName("FastaAlternateReferenceMaker") @Reference(window=@Window(start=-1,stop=50)) @Requires(value={DataSource.REFERENCE}) -public class FastaAlternateReferenceWalker extends FastaReferenceWalker { +public class FastaAlternateReference extends FastaReference { /** * Variants from these input files are used by this tool to construct an alternate reference. @@ -99,16 +105,16 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { String refBase = String.valueOf((char)ref.getBase()); // Check to see if we have a called snp - for ( VariantContext vc : tracker.getValues(variants) ) { + for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) { if ( vc.isFiltered() ) continue; if ( vc.isSimpleDeletion()) { - deletionBasesRemaining = vc.getReference().length(); + deletionBasesRemaining = vc.getReference().length() - 1; // delete the next n bases, not this one return new Pair(context.getLocation(), refBase); } else if ( vc.isSimpleInsertion()) { - return new Pair(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); + return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); } else if (vc.isSNP()) { return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReference.java similarity index 93% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReference.java index 7ae5c5c75..a835560d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReference.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.fasta; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; @@ -66,8 +68,9 @@ import java.io.PrintStream; * * */ +@DocumentedGATKFeature( groupName = "Companion Utilities", extraDocs = {CommandLineGATK.class} ) @WalkerName("FastaReferenceMaker") -public class FastaReferenceWalker extends RefWalker, GenomeLoc> { +public class FastaReference extends RefWalker, GenomeLoc> { @Output PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStatsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java similarity index 79% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStatsWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java index f5c5f909f..6beade070 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStatsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java @@ -26,21 +26,24 @@ package org.broadinstitute.sting.gatk.walkers.fasta; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; /** * Calculates basic statistics about the reference sequence itself */ -public class FastaStatsWalker extends RefWalker { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class FastaStats extends RefWalker { @Output PrintStream out; - protected class FastaStats { + protected class FastaStatistics { long nBases = 0, nRegBases = 0; } @@ -50,19 +53,19 @@ public class FastaStatsWalker extends RefWalker * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-50,stop=50)) -public class VariantFiltrationWalker extends RodWalker { +public class VariantFiltration extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @@ -331,7 +336,11 @@ public class VariantFiltrationWalker extends RodWalker { filters.add(exp.name); } } - builder.filters(filters); + + if ( filters.isEmpty() ) + builder.passFilters(); + else + builder.filters(filters); writer.add(builder.make()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 432bbd6d7..08a333486 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { public enum Model { /** The default model with the best performance in all cases */ - EXACT, - POOL + EXACT } protected int N; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index cef09a913..7dcc95361 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -148,8 +148,8 @@ public class ConsensusAlleleCounter { boolean foundKey = false; // copy of hashmap into temp arrayList ArrayList> cList = new ArrayList>(); - for (String s : consensusIndelStrings.keySet()) { - cList.add(new Pair(s,consensusIndelStrings.get(s))); + for (Map.Entry s : consensusIndelStrings.entrySet()) { + cList.add(new Pair(s.getKey(), s.getValue())); } if (read.getAlignmentEnd() == loc.getStart()) { @@ -246,18 +246,19 @@ public class ConsensusAlleleCounter { // get ref bases of accurate deletion final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart(); stop = loc.getStart() + dLen; - final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); + final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding if (Allele.acceptableAlleleBases(refBases, false)) { refAllele = Allele.create(refBases, true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + altAllele = Allele.create(ref.getBase(), false); } else continue; // don't go on with this allele if refBases are non-standard } else { // insertion case - if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(s, false); + final String insertionBases = (char)ref.getBase() + s; // add reference padding + if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions + refAllele = Allele.create(ref.getBase(), true); + altAllele = Allele.create(insertionBases, false); stop = loc.getStart(); } else continue; // go on to next allele if consensus insertion has any non-standard base. @@ -267,7 +268,6 @@ public class ConsensusAlleleCounter { final VariantContextBuilder builder = new VariantContextBuilder().source(""); builder.loc(loc.getContig(), loc.getStart(), stop); builder.alleles(Arrays.asList(refAllele, altAllele)); - builder.referenceBaseForIndel(ref.getBase()); builder.noGenotypes(); if (doMultiAllelicCalls) { vcs.add(builder.make()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 80b58cfa6..6fdc926d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -47,19 +47,21 @@ import java.util.Map; */ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { -/* public enum Model { - SNP, - INDEL, - BOTH - } - */ + public static final String DUMMY_LANE = "Lane1"; + public static final String DUMMY_SAMPLE_NAME = "DummySample1"; + + /* public enum Model { + SNP, + INDEL, + BOTH + } + */ public enum Model { SNP, INDEL, - BOTH, - POOLSNP, - POOLINDEL, - POOLBOTH + GeneralPloidySNP, + GeneralPloidyINDEL, + BOTH } public enum GENOTYPING_MODE { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 230d6c324..bedffa690 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Haplotype; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; @@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private boolean DEBUG = false; private boolean ignoreSNPAllelesWhenGenotypingIndels = false; private PairHMMIndelErrorModel pairModel; - private boolean allelesArePadded; - + private static ThreadLocal>> indelLikelihoodMap = new ThreadLocal>>() { protected synchronized HashMap> initialValue() { @@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood indelLikelihoodMap.set(new HashMap>()); haplotypeMap.clear(); - Pair,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); - alleleList = pair.first; - allelesArePadded = pair.second; + alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); if (alleleList.isEmpty()) return null; } - getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements if (haplotypeMap == null || haplotypeMap.isEmpty()) return null; // start making the VariantContext // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base. - - final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded); + final int endLoc = loc.getStart() + alleleList.get(0).length() - 1; final int eventLength = getEventLength(alleleList); - final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase()); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList); // create the genotypes; no-call everyone for now GenotypesContext genotypes = GenotypesContext.create(); @@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood return indelLikelihoodMap.get(); } - public static int computeEndLocation(final List alleles, final GenomeLoc loc, final boolean allelesArePadded) { - Allele refAllele = alleles.get(0); - int endLoc = loc.getStart() + refAllele.length()-1; - if (allelesArePadded) - endLoc++; - - return endLoc; - } - public static void getHaplotypeMapFromAlleles(final List alleleList, final ReferenceContext ref, final GenomeLoc loc, @@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } - public static Pair,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker, + public static List getInitialAlleleList(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map contexts, final AlignmentContextUtils.ReadOrientation contextType, final GenomeLocParser locParser, final UnifiedArgumentCollection UAC, final boolean ignoreSNPAllelesWhenGenotypingIndels) { - + List alleles = new ArrayList(); - boolean allelesArePadded = true; if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { VariantContext vc = null; for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { @@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } // ignore places where we don't have a variant if (vc == null) - return new Pair,Boolean>(alleles,false); + return alleles; if (ignoreSNPAllelesWhenGenotypingIndels) { // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it @@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } else { alleles.addAll(vc.getAlleles()); } - if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1) - allelesArePadded = false; - - } else { - alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); + alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); } - return new Pair,Boolean> (alleles,allelesArePadded); + return alleles; } // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index ac7c370bf..07d5d2f2d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -43,7 +42,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.variantcontext.*; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; @@ -68,9 +66,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC final byte refBase = ref.getBase(); final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase); + // handle non-standard reference bases + if ( indexOfRefBase == -1 ) + return null; final Allele refAllele = Allele.create(refBase, true); - // calculate the GLs ArrayList GLs = new ArrayList(contexts.size()); for ( Map.Entry sample : contexts.entrySet() ) { @@ -208,7 +208,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements ); } - public class BAQedPileupElement extends PileupElement { + public static class BAQedPileupElement extends PileupElement { public BAQedPileupElement( final PileupElement PE ) { super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 6733a0fca..e755a1e36 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; public class UnifiedArgumentCollection { @@ -113,11 +114,11 @@ public class UnifiedArgumentCollection { * that you not play around with this parameter. */ @Advanced - @Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) + @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 3; @Hidden - @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false) + @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false) public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false; // indel-related arguments @@ -145,12 +146,12 @@ public class UnifiedArgumentCollection { @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false) public double INDEL_HETEROZYGOSITY = 1.0/8000; - @Hidden - @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false) + @Advanced + @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false) public byte INDEL_GAP_CONTINUATION_PENALTY = 10; - @Hidden - @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false) + @Advanced + @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false) public byte INDEL_GAP_OPEN_PENALTY = 45; @Hidden @@ -169,6 +170,66 @@ public class UnifiedArgumentCollection { @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) public boolean IGNORE_SNP_ALLELES = false; + /* + Generalized ploidy argument (debug only): squash all reads into a single pileup without considering sample info + */ + @Hidden + @Argument(fullName = "allReadsSP", shortName = "dl", doc = "expt", required = false) + public boolean TREAT_ALL_READS_AS_SINGLE_POOL = false; + + /* + Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only + sample-level error model + */ + + @Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false) + public boolean IGNORE_LANE_INFO = false; + + /* + Generalized ploidy argument: VCF file that contains truth calls for reference sample. If a reference sample is included through argument -refsample, + then this argument is required. + */ + @Input(fullName="reference_sample_calls", shortName = "referenceCalls", doc="VCF file with the truth callset for the reference sample", required=false) + RodBinding referenceSampleRod; + + /* + Reference sample name: if included, a site-specific error model will be built in order to improve calling quality. This requires ideally + that a bar-coded reference sample be included with the polyploid/pooled data in a sequencing experimental design. + If argument is absent, no per-site error model is included and calling is done with a generalization of traditional statistical calling. + */ + @Argument(shortName="refsample", fullName="reference_sample_name", doc="Reference sample name.", required=false) + String referenceSampleName; + + /* + Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy + */ + @Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false) + int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY; + + @Hidden + @Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false) + byte minQualityScore= 1; + + @Hidden + @Argument(shortName="maxqs", fullName="max_quality_score", doc="Max quality score to consider. Smaller numbers process faster. Default: Q40.", required=false) + byte maxQualityScore= 40; + + @Hidden + @Argument(shortName="site_prior", fullName="site_quality_prior", doc="Phred-Scaled prior quality of the site. Default: Q20.", required=false) + byte phredScaledPrior = 20; + + @Hidden + @Argument(shortName = "min_call_power", fullName = "min_power_threshold_for_calling", doc="The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required).", required = false) + double minPower = 0.95; + + @Hidden + @Argument(shortName = "min_depth", fullName = "min_reference_depth", doc="The minimum depth required in the reference sample in order to make a call.", required = false) + int minReferenceDepth = 100; + + @Hidden + @Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false) + boolean EXCLUDE_FILTERED_REFERENCE_SITES = false; + // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! public UnifiedArgumentCollection clone() { @@ -196,6 +257,17 @@ public class UnifiedArgumentCollection { uac.alleles = alleles; uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES; uac.CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS; + uac.GLmodel = GLmodel; + uac.TREAT_ALL_READS_AS_SINGLE_POOL = TREAT_ALL_READS_AS_SINGLE_POOL; + uac.referenceSampleRod = referenceSampleRod; + uac.referenceSampleName = referenceSampleName; + uac.samplePloidy = samplePloidy; + uac.maxQualityScore = minQualityScore; + uac.phredScaledPrior = phredScaledPrior; + uac.minPower = minPower; + uac.minReferenceDepth = minReferenceDepth; + uac.EXCLUDE_FILTERED_REFERENCE_SITES = EXCLUDE_FILTERED_REFERENCE_SITES; + uac.IGNORE_LANE_INFO = IGNORE_LANE_INFO; // todo- arguments to remove uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 29ca1265c..507806fbe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -35,15 +36,17 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.io.PrintStream; import java.util.*; @@ -66,7 +69,7 @@ import java.util.*; * *

Output

*

- * A raw, unfiltered, highly specific callset in VCF format. + * A raw, unfiltered, highly sensitive callset in VCF format. *

* *

Example generic command for multi-sample SNP calling

@@ -79,7 +82,7 @@ import java.util.*; * -o snps.raw.vcf \ * -stand_call_conf [50.0] \ * -stand_emit_conf 10.0 \ - * -dcov [50] \ + * -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \ * [-L targets.interval_list] * * @@ -113,6 +116,7 @@ import java.util.*; * */ +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) @ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} ) @Reference(window=@Window(start=-200,stop=200)) @@ -120,7 +124,7 @@ import java.util.*; // TODO -- When LocusIteratorByState gets cleaned up, we should enable multiple @By sources: // TODO -- @By( {DataSource.READS, DataSource.REFERENCE_ORDERED_DATA} ) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) -public class UnifiedGenotyper extends LocusWalker, UnifiedGenotyper.UGStatistics> implements TreeReducible, AnnotatorCompatibleWalker { +public class UnifiedGenotyper extends LocusWalker, UnifiedGenotyper.UGStatistics> implements TreeReducible, AnnotatorCompatible { @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @@ -149,7 +153,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif public boolean alwaysAppendDbsnpId() { return false; } /** - * A raw, unfiltered, highly specific callset in VCF format. + * A raw, unfiltered, highly sensitive callset in VCF format. */ @Output(doc="File to which variants should be written",required=true) protected VariantContextWriter writer = null; @@ -222,6 +226,45 @@ public class UnifiedGenotyper extends LocusWalker, Unif * **/ public void initialize() { + + // Check for protected modes + if (GATKLiteUtils.isGATKLite()) { + // no polyploid/pooled mode in GATK Like + if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY || + UAC.referenceSampleName != null || + UAC.referenceSampleRod.isBound()) { + throw new UserException.NotSupportedInGATKLite("Usage of ploidy values different than 2 not supported in this GATK version"); + } + // get all of the unique sample names + samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + + } else { + // in full mode: check for consistency in ploidy/pool calling arguments + // check for correct calculation models +/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { + // polyploidy requires POOL GL and AF calculation models to be specified right now + if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL + && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) { + throw new UserException("Incorrect genotype calculation model chosen. Only [POOLSNP|POOLINDEL|POOLBOTH] supported with this walker if sample ploidy != 2"); + } + + if (UAC.AFmodel != AlleleFrequencyCalculationModel.Model.POOL) + throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2"); + + } + */ + // get all of the unique sample names + if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) { + samples.clear(); + samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME); + } else { + samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + if (UAC.referenceSampleName != null ) + samples.remove(UAC.referenceSampleName); + } + + } + // check for a bad max alleles value if ( UAC.MAX_ALTERNATE_ALLELES > GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED) throw new UserException.BadArgumentValue("max_alternate_alleles", "the maximum possible value is " + GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED); @@ -232,15 +275,12 @@ public class UnifiedGenotyper extends LocusWalker, Unif UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP ) logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode"); - // get all of the unique sample names - samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); - - // initialize the verbose writer + // initialize the verbose writer if ( verboseWriter != null ) verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP"); annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); - UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, VariantContextUtils.DEFAULT_PLOIDY); + UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, UAC.samplePloidy); // initialize the header Set headerInfo = getHeaderInfo(UAC, annotationEngine, dbsnp); @@ -250,6 +290,8 @@ public class UnifiedGenotyper extends LocusWalker, Unif annotationEngine.invokeAnnotationInitializationMethods(headerInfo); writer.writeHeader(new VCFHeader(headerInfo, samples)); + + } public static Set getHeaderInfo(final UnifiedArgumentCollection UAC, @@ -268,6 +310,15 @@ public class UnifiedGenotyper extends LocusWalker, Unif if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED ) headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site")); + // add the pool values for each genotype + if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { + headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample")); + headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample")); + } + if (UAC.referenceSampleName != null) { + headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth")); + } + VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.DOWNSAMPLED_KEY, VCFConstants.MLE_ALLELE_COUNT_KEY, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 7a5a1ba0b..f4bd196ae 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -51,6 +50,7 @@ import java.util.*; public class UnifiedGenotyperEngine { public static final String LOW_QUAL_FILTER_NAME = "LowQual"; + private static final String GPSTRING = "GeneralPloidy"; public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; @@ -115,7 +115,7 @@ public class UnifiedGenotyperEngine { // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size())); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY); } @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"}) @@ -145,10 +145,10 @@ public class UnifiedGenotyperEngine { * * same as the full call but with allSamples == null * - * @param tracker - * @param refContext - * @param rawContext - * @return + * @param tracker the meta data tracker + * @param refContext the reference base + * @param rawContext contextual information around the locus + * @return the VariantCallContext object */ public List calculateLikelihoodsAndGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext, @@ -274,7 +274,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); + return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -283,7 +283,7 @@ public class UnifiedGenotyperEngine { VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles); if ( vcInput == null ) return null; - vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).referenceBaseForIndel(vcInput.getReferenceBaseForIndel()).make(); + vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).make(); } else { // deal with bad/non-standard reference bases if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) @@ -408,11 +408,6 @@ public class UnifiedGenotyperEngine { builder.log10PError(phredScaledConfidence/-10.0); if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); - if ( limitedContext ) { - builder.referenceBaseForIndel(vc.getReferenceBaseForIndel()); - } else { - builder.referenceBaseForIndel(refContext.getBase()); - } // create the genotypes final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy); @@ -493,8 +488,8 @@ public class UnifiedGenotyperEngine { // if we are subsetting alleles (either because there were too many or because some were not polymorphic) // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). - if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed - vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall); + if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) + vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); if ( annotationEngine != null && !limitedContext ) { // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations @@ -646,6 +641,9 @@ public class UnifiedGenotyperEngine { if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH",""); + if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) + modelPrefix = GPSTRING + modelPrefix; + // if we're genotyping given alleles and we have a requested SNP at this position, do SNP if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); @@ -654,17 +652,13 @@ public class UnifiedGenotyperEngine { if ( vcInput.isSNP() ) { // ignore SNPs if the user chose INDEL mode only - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) + if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") ) models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); - else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") ) - models.add(UAC.GLmodel); - } + } else if ( vcInput.isIndel() || vcInput.isMixed() ) { // ignore INDELs if the user chose SNP mode only - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) + if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") ) models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); - else if (UAC.GLmodel.name().toUpperCase().contains("INDEL")) - models.add(UAC.GLmodel); } // No support for other types yet } @@ -674,7 +668,7 @@ public class UnifiedGenotyperEngine { models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); } else { - models.add(UAC.GLmodel); + models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase())); } } @@ -736,12 +730,19 @@ public class UnifiedGenotyperEngine { } private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) { + List> afClasses = new PluginManager(AlleleFrequencyCalculationModel.class).getPlugins(); + // user-specified name + String afModelName = UAC.AFmodel.name(); + + if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) + afModelName = GPSTRING + afModelName; + for (int i = 0; i < afClasses.size(); i++) { Class afClass = afClasses.get(i); String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase(); - if (UAC.AFmodel.name().equalsIgnoreCase(key)) { + if (afModelName.equalsIgnoreCase(key)) { try { Object args[] = new Object[]{UAC,N,logger,verboseWriter}; Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java index 3dd51fa7d..4feba35af 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java @@ -124,7 +124,7 @@ public class ConstrainedMateFixingManager { return first; } - private class SAMRecordHashObject { + private static class SAMRecordHashObject { public SAMRecord record; public boolean wasModified; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 26023bd2f..3a10620aa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; +import java.util.Map; public class HaplotypeIndelErrorModel { @@ -427,8 +428,8 @@ public class HaplotypeIndelErrorModel { // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent int j=0; - for (Allele a: haplotypesInVC.keySet()) { - readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(haplotypesInVC.get(a), read); + for (Map.Entry a: haplotypesInVC.entrySet()) { + readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(a.getValue(), read); if (DEBUG) { System.out.print(read.getReadName()+" "); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index addfc5fab..5e0f15e6a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -32,6 +32,7 @@ import net.sf.samtools.util.SequenceUtil; import net.sf.samtools.util.StringUtil; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; @@ -46,6 +47,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter; @@ -109,6 +111,7 @@ import java.util.*; * * @author ebanks */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) public class IndelRealigner extends ReadWalker { @@ -869,7 +872,13 @@ public class IndelRealigner extends ReadWalker { for ( VariantContext knownIndel : knownIndelsToTry ) { if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() ) continue; - byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); + final byte[] indelStr; + if ( knownIndel.isSimpleInsertion() ) { + final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases(); + indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding + } else { + indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1); + } int start = knownIndel.getStart() - leftmostIndex + 1; Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel); if ( c != null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java index 7490262f2..b08def44f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java @@ -29,10 +29,12 @@ import net.sf.samtools.Cigar; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -65,24 +67,13 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * * */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class LeftAlignIndels extends ReadWalker { @Output(required=false, doc="Output bam") protected StingSAMFileWriter writer = null; - /** - * If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool - * may run out of memory. We recommend that you additionally tell Java to use a temp directory with plenty of available - * space (by setting java.io.tempdir on the command-line). - */ - @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false) - protected int MAX_RECORDS_IN_RAM = 500000; - - public void initialize() { - // set up the output writer - if ( writer != null ) - writer.setMaxRecordsInRam(MAX_RECORDS_IN_RAM); - } + public void initialize() {} private void emit(final SAMRecord read) { if ( writer != null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index f49e78469..65c5a2fbc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.indels; +import com.google.java.contract.Ensures; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; @@ -175,7 +176,8 @@ public class PairHMMIndelErrorModel { } - public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup, + @Ensures("result != null && result.length == pileup.getNumberOfElements()") + public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup, final LinkedHashMap haplotypeMap, final ReferenceContext ref, final int eventLength, @@ -349,7 +351,9 @@ public class PairHMMIndelErrorModel { previousHaplotypeSeen = haplotypeBases.clone(); readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, - contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities, + (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), + (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), + contextLogGapContinuationProbabilities, startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 5f39f4ea8..fc6df6902 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.*; @@ -37,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -94,6 +96,7 @@ import java.util.TreeSet; * * @author ebanks */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, BadMateFilter.class, Platform454Filter.class, BadCigarFilter.class}) @Reference(window=@Window(start=-1,stop=50)) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @@ -329,7 +332,7 @@ public class RealignerTargetCreator extends RodWalker intervals = new TreeSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java index 504359f3f..ba16fd709 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java @@ -32,6 +32,7 @@ import org.apache.commons.jexl2.JexlEngine; import org.apache.commons.jexl2.MapContext; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; @@ -53,17 +54,18 @@ import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature; import org.broadinstitute.sting.utils.codecs.refseq.Transcript; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.collections.CircularArray; import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.io.*; import java.util.*; @@ -126,8 +128,9 @@ import java.util.*; * */ +@DocumentedGATKFeature( groupName = "Cancer-specific Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class}) -public class SomaticIndelDetectorWalker extends ReadWalker { +public class SomaticIndelDetector extends ReadWalker { // @Output // PrintStream out; @Output(doc="File to write variants (indels) in VCF format",required=true) @@ -1128,12 +1131,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker { List alleles = new ArrayList(2); // actual observed (distinct!) alleles at the site List homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype + final byte referencePaddingBase = refBases[(int)start-1]; if ( call.getVariant() == null ) { - // we will need to cteate genotype with two (hom) ref alleles (below). + // we will need to create genotype with two (hom) ref alleles (below). // we can not use 'alleles' list here, since that list is supposed to contain // only *distinct* alleles observed at the site or VCFContext will frown upon us... - alleles.add( Allele.create(refBases[(int)start-1],true) ); + alleles.add( Allele.create(referencePaddingBase,true) ); homref_alleles = new ArrayList(2); homref_alleles.add( alleles.get(0)); homref_alleles.add( alleles.get(0)); @@ -1142,7 +1146,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { // (Genotype will tell us whether it is an actual call or not!) int event_length = call.getVariant().lengthOnRef(); if ( event_length < 0 ) event_length = 0; - fillAlleleList(alleles,call); + fillAlleleList(alleles,call,referencePaddingBase); stop += event_length; } @@ -1162,7 +1166,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { filters.add("NoCall"); } VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) - .genotypes(genotypes).filters(filters).referenceBaseForIndel(refBases[(int)start-1]).make(); + .genotypes(genotypes).filters(filters).make(); vcf.add(vc); } @@ -1172,16 +1176,16 @@ public class SomaticIndelDetectorWalker extends ReadWalker { * @param l * @param call */ - private void fillAlleleList(List l, IndelPrecall call) { + private void fillAlleleList(List l, IndelPrecall call, byte referencePaddingBase) { int event_length = call.getVariant().lengthOnRef(); if ( event_length == 0 ) { // insertion - l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) ); - l.add( Allele.create(call.getVariant().getBases(), false )); + l.add( Allele.create(referencePaddingBase,true) ); + l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false )); } else { //deletion: - l.add( Allele.create(call.getVariant().getBases(), true )); - l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) ); + l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true )); + l.add( Allele.create(referencePaddingBase,false) ); } } @@ -1215,19 +1219,20 @@ public class SomaticIndelDetectorWalker extends ReadWalker { // } boolean homRefT = ( tCall.getVariant() == null ); boolean homRefN = ( nCall.getVariant() == null ); + final byte referencePaddingBase = refBases[(int)start-1]; if ( tCall.getVariant() == null && nCall.getVariant() == null) { // no indel at all ; create base-representation ref/ref alleles for genotype construction - alleles.add( Allele.create(refBases[(int)start-1],true) ); + alleles.add( Allele.create(referencePaddingBase,true) ); } else { // we got indel(s) int event_length = 0; if ( tCall.getVariant() != null ) { // indel in tumor event_length = tCall.getVariant().lengthOnRef(); - fillAlleleList(alleles, tCall); + fillAlleleList(alleles, tCall, referencePaddingBase); } else { event_length = nCall.getVariant().lengthOnRef(); - fillAlleleList(alleles, nCall); + fillAlleleList(alleles, nCall, referencePaddingBase); } if ( event_length > 0 ) stop += event_length; } @@ -1259,7 +1264,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) - .genotypes(genotypes).filters(filters).attributes(attrs).referenceBaseForIndel(refBases[(int)start-1]).make(); + .genotypes(genotypes).filters(filters).attributes(attrs).make(); vcf.add(vc); } @@ -1299,7 +1304,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { @Override public Integer reduceInit() { - return new Integer(0); + return 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index aa4b4ab78..bbd4bf92f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -12,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; @@ -72,6 +74,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) public class PhaseByTransmission extends RodWalker, HashMap> { @ArgumentCollection @@ -342,9 +345,10 @@ public class PhaseByTransmission extends RodWalker, HashMa private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){ int phredScoreTransmission = -1; - if(transmissionProb != NO_TRANSMISSION_PROB) - phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb)); - + if(transmissionProb != NO_TRANSMISSION_PROB){ + double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb))); + phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE; + } //Handle null, missing and unavailable genotypes //Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable //genotype so it is safe to return the original genotype in this case. @@ -410,7 +414,7 @@ public class PhaseByTransmission extends RodWalker, HashMa buildMatrices(); if(mvFile != null) - mvFile.println("#CHROM\tPOS\tFILTER\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_RAD\tMOTHER_AAD\tMOTHER_HRPL\tMOTHER_HETPL\tMOTHER_HAPL\tFATHER_GT\tFATHER_DP\tFATHER_RAD\tFATHER_AAD\tFATHER_HRPL\tFATHER_HETPL\tFATHER_HAPL\tCHILD_GT\tCHILD_DP\tCHILD_RAD\tCHILD_AAD\tCHILD_HRPL\tCHILD_HETPL\tCHILD_HAPL"); + mvFile.println("CHROM\tPOS\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_AD\tMOTHER_PL\tFATHER_GT\tFATHER_DP\tFATHER_AD\tFATHER_PL\tCHILD_GT\tCHILD_DP\tCHILD_AD\tCHILD_PL"); } @@ -422,10 +426,10 @@ public class PhaseByTransmission extends RodWalker, HashMa Map> families = this.getSampleDB().getFamilies(); Set family; ArrayList parents; - for(String familyID : families.keySet()){ - family = families.get(familyID); + for(Map.Entry> familyEntry : families.entrySet()){ + family = familyEntry.getValue(); if(family.size()<2 || family.size()>3){ - logger.info(String.format("Caution: Family %s has %d members; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyID,family.size())); + logger.info(String.format("Caution: Family %s has %d members; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyEntry.getKey(),family.size())); } else{ for(Sample familyMember : family){ @@ -434,7 +438,7 @@ public class PhaseByTransmission extends RodWalker, HashMa if(family.containsAll(parents)) this.trios.add(familyMember); else - logger.info(String.format("Caution: Family %s skipped as it is not a trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyID)); + logger.info(String.format("Caution: Family %s skipped as it is not a trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyEntry.getKey())); break; } } @@ -776,7 +780,7 @@ public class PhaseByTransmission extends RodWalker, HashMa return metricsCounters; final VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation()); - if (vc == null) + if (vc == null || !vc.isBiallelic()) return metricsCounters; final VariantContextBuilder builder = new VariantContextBuilder(vc); @@ -805,8 +809,8 @@ public class PhaseByTransmission extends RodWalker, HashMa if(father != null){ genotypesContext.replace(phasedFather); updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters); - mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s", - vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), + mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", + vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()), phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(), phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString()); @@ -817,8 +821,8 @@ public class PhaseByTransmission extends RodWalker, HashMa updatePairMetricsCounters(phasedMother,phasedChild,mvCount,metricsCounters); if(!(phasedMother.getType()==mother.getType() && phasedChild.getType()==child.getType())) metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); - mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s", - vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), + mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s:%s:%s:%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s", + vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(), phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString()); } @@ -828,15 +832,15 @@ public class PhaseByTransmission extends RodWalker, HashMa updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters); if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType())) metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); - mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s", - vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), + mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", + vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(), phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString()); } //Report violation if set so //TODO: ADAPT FOR PAIRS TOO!! - if(mvCount>0 && mvFile != null) + if(mvCount>0 && mvFile != null && !vc.isFiltered()) mvFile.println(mvfLine); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java index 4bf24cf86..630d99ce9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java @@ -113,7 +113,7 @@ class PhasingUtils { Map mergedGtAttribs = new HashMap(); PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2); if (phaseQual.PQ != null) - mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); + mergedGtAttribs.put(ReadBackedPhasing.PQ_KEY, phaseQual.PQ); Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).log10PError(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make(); mergedGenotypes.add(mergedGt); @@ -373,7 +373,7 @@ class PhasingUtils { public PhaseAndQuality(Genotype gt) { this.isPhased = gt.isPhased(); if (this.isPhased) { - this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1); + this.PQ = gt.getAttributeAsDouble(ReadBackedPhasing.PQ_KEY, -1); if ( this.PQ == -1 ) this.PQ = null; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index 261c51bcc..f49e8f8c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -27,6 +27,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -40,6 +41,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; @@ -91,7 +93,8 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr // Filter out all reads with zero mapping quality @ReadFilters({MappingQualityZeroFilter.class}) -public class ReadBackedPhasingWalker extends RodWalker { +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) +public class ReadBackedPhasing extends RodWalker { private static final boolean DEBUG = false; /** * The VCF file we are phasing variants from. @@ -255,7 +258,7 @@ public class ReadBackedPhasingWalker extends RodWalker samplesToPhase) { // for ( String sample : samplesToPhase ) // logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() )); - VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true); + VariantContext subvc = vc.subContextFromSamples(samplesToPhase); // logger.debug("original VC = " + vc); // logger.debug("sub VC = " + subvc); return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBasesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java similarity index 72% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBasesWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java index b846ce6b0..0c323934e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBasesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java @@ -1,19 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** - * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. - * - *

- * Can also count the number of reads matching a given criterion using read filters (see the - * --read-filter command line argument). Simplest example of a read-backed analysis. - * + * Walks over the input data set, calculating the number of bases seen for diagnostic purposes. * *

Input

*

@@ -22,22 +19,23 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * *

Output

*

- * Number of reads seen. + * Number of bases seen. *

* *

Examples

*
  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \
- *   -T CountReads \
+ *   -T CountBases \
  *   -o output.txt \
  *   -I input.bam \
  *   [-L input.intervals]
  * 
* */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class CountBasesWalker extends ReadWalker { +public class CountBases extends ReadWalker { public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) { return read.getReadLength(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java index 29b649afe..20b54fa66 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java @@ -5,12 +5,14 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; import java.util.Collections; @@ -21,6 +23,7 @@ import java.util.List; * very useful since overlapping intervals get merged, so you can count the number of intervals the GATK merges down to. * This was its very first use. */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class CountIntervals extends RefWalker { @Output PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java similarity index 82% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java index 09113704a..bd10eab87 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.qc; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; @@ -37,7 +39,8 @@ import java.io.PrintStream; * * */ -public class CountLociWalker extends LocusWalker implements TreeReducible { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class CountLoci extends LocusWalker implements TreeReducible { @Output(doc="Write count to this file instead of STDOUT") PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java similarity index 84% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java index dbbd8e761..bc178119d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.samples.Gender; @@ -31,15 +32,15 @@ import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** - * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. - * Can also count the number of reads matching a given criterion using read filters (see the - * --read-filter command line argument). Simplest example of a read-backed analysis. + * Walks over the input data set, calculating the number of reads seen from male samples for diagnostic purposes. */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class CountMalesWalker extends ReadWalker { +public class CountMales extends ReadWalker { public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) { Sample sample = getSampleDB().getSample(read); return sample.getGender() == Gender.MALE ? 1 : 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java index edbd5ff75..9915d617e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; import java.util.*; @@ -70,7 +72,8 @@ import java.util.*; * * */ -public class CountRODsWalker extends RodWalker, Long>> implements TreeReducible, Long>> { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class CountRODs extends RodWalker, Long>> implements TreeReducible, Long>> { @Output public PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java similarity index 85% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java index 7c7d6417a..57ddfb083 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java @@ -29,12 +29,14 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.Collections; import java.util.List; @@ -63,7 +65,8 @@ import java.util.List; * * */ -public class CountRODsByRefWalker extends RefWalker, Long>> { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class CountRODsByRef extends RefWalker, Long>> { /** * One or more input rod files @@ -77,14 +80,14 @@ public class CountRODsByRefWalker extends RefWalker, Long> reduce(CountRODsWalker.Datum point, Pair, Long> sum) { + public Pair, Long> reduce(CountRODs.Datum point, Pair, Long> sum) { return crw.reduce(point, sum); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEventsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java similarity index 85% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEventsWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java index c5ab0426d..80845c447 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEventsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java @@ -2,12 +2,14 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -17,7 +19,7 @@ import java.util.HashMap; import java.util.Map; /** - * Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips + * Walks over the input data set, counting the number of read events (from the CIGAR operator) * *

Input

*

@@ -26,23 +28,22 @@ import java.util.Map; * *

Output

*

- * Number of reads ending in each category. - *

+ * Number of reads events for each category * *

Examples

*
  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \
- *   -T ReadEndIndels \
+ *   -T CountReadEvents \
  *   -o output.grp \
  *   -I input.bam \
  *   [-L input.intervals]
  * 
*/ - +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class CountReadEventsWalker extends ReadWalker> , Map>> { +public class CountReadEvents extends ReadWalker> , Map>> { @Output (doc = "GATKReport table output") PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java similarity index 81% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java index b5a2d183f..5a9e5e7d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java @@ -1,10 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** @@ -36,8 +38,9 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * * */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class CountReadsWalker extends ReadWalker { +public class CountReads extends ReadWalker { public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) { return 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEventWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java similarity index 87% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEventWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java index 9208cbae8..971b5bb85 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEventWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; @@ -9,6 +10,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.List; @@ -30,14 +32,15 @@ import java.util.List; *
  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \
- *   -T ReadEndIndels \
+ *   -T CountTerminusEvent \
  *   -o output.txt \
  *   -I input.bam \
  *   [-L input.intervals]
  * 
*/ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class CountTerminusEventWalker extends ReadWalker, Pair> { +public class CountTerminusEvent extends ReadWalker, Pair> { public Pair map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) { List cigarElements = read.getCigar().getCigarElements(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java deleted file mode 100644 index 1cb1579d0..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.qc; - -import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.utils.collections.PrimitivePair; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.io.*; -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Apr 9, 2010 - * Time: 12:16:41 PM - * To change this template use File | Settings | File Templates. - */ - -/** - * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. - * Can also count the number of reads matching a given criterion using read filters (see the - * --read-filter command line argument). Simplest example of a read-backed analysis. - */ -@Requires({DataSource.READS}) -public class CycleQualityWalker extends ReadWalker { - @Output - protected PrintStream out; - - @Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+ - "on mapped reads only, while unmapped reads will be discarded", required=false) - protected boolean MAPPED_ONLY = true; - @Argument(fullName="maxReadLength", shortName="rl", doc="maximum read length", required=false) - protected int MAX_READ_LENGTH = 500; - @Argument(fullName="out_prefix",shortName="p",doc="prefix for output report and statistics files",required=true) - protected String PREFIX = null; -// @Argument(fullName="html",shortName="html",doc="produce html-formatted output (starting with h3-level tags) rather than plain text",required=false) - protected boolean HTML = false; - @Argument(fullName="qualThreshold", shortName="Q",doc="flag as problematic all cycles with av. qualities below the threshold (applies only to the generated report)",required=false) - protected double QTHRESHOLD = 10.0; - @Argument(fullName="useBothQualities",shortName="bothQ",required=false,doc="Generate statistics both for currently set and for "+ - "original base qualities (OQ tag, must be present in the bam); two separate data files will be generated.") - protected boolean ASSESS_BOTH_QUALS = false; - - private Map cyclesByLaneMap = null; - private Map cyclesByLibraryMap = null; - private Map cyclesByLaneMapOrig = null; - private Map cyclesByLibraryMapOrig = null; - - public void initialize() { - if ( PREFIX == null ) throw new ReviewedStingException("Prefix for output file(s) must be specified"); - cyclesByLaneMap = new HashMap(); - cyclesByLibraryMap = new HashMap(); - cyclesByLaneMapOrig = new HashMap(); - cyclesByLibraryMapOrig = new HashMap(); - } - - - public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { - - if ( AlignmentUtils.isReadUnmapped(read) ) return 0; - - SAMReadGroupRecord rg = read.getReadGroup(); - - if ( rg == null ) throw new UserException.ReadMissingReadGroup(read); - - String lane = read.getReadGroup().getPlatformUnit(); - String library = read.getReadGroup().getLibrary(); - - if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information"); - if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information"); - - int end = 0; - - if ( read.getReadPairedFlag() ) { - - if ( read.getFirstOfPairFlag() ) { - if ( read.getSecondOfPairFlag() ) - throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes"); - end = 1; - } else { - if ( ! read.getSecondOfPairFlag() ) - throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes"); - end = 2; - } - } - - CycleStats[] byLane = cyclesByLaneMap.get(lane); - CycleStats[] byLib = cyclesByLibraryMap.get(library); - - //byte [] quals = USE_ORIGINAL_QUALS ? AlignmentUtils.getOriginalQualsInCycleOrder(read) : AlignmentUtils.getQualsInCycleOrder(read); - - byte [] quals = AlignmentUtils.getQualsInCycleOrder(read); - - // if end == 0 (single end lane), we allocate array of length 1, otherwise we need two - // elements in the array in order to be able to collect statistics for each end in the pair independently - if ( byLane == null ) cyclesByLaneMap.put(lane,byLane = new CycleStats[(end==0?1:2)]); - if ( byLib == null ) cyclesByLibraryMap.put(library, byLib =new CycleStats[2]); - - if ( end != 0 ) end--; // we will now use 'end' as index into the array of stats - - if ( byLane[end] == null ) byLane[end] = new CycleStats(MAX_READ_LENGTH); - if ( byLib[end] == null ) byLib[end] =new CycleStats(MAX_READ_LENGTH); - byLane[end].add(quals); - byLib[end].add(quals); - - return 1; //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ - public Integer reduceInit() { - return 0; //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return accumulator with result of the map taken into account. - */ - public Integer reduce(Integer value, Integer sum) { - return sum.intValue()+value.intValue(); //To change body of implemented methods use File | Settings | File Templates. - } - - public void onTraversalDone(Integer result) { - if ( HTML ) { - out.println("

Cycle Quality QC

\n"); - out.println("File(s) analyzed:
"); - for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"
"); - out.println("
"); - } - if ( HTML ) out.println("

"); - out.println("\n"+result+" reads analyzed\n"); - if ( HTML ) out.println("

"); - out.println("by platform unit:"); - if ( HTML ) out.println("
"); - report2(cyclesByLaneMap, new File(PREFIX+".byLane.txt"),true); - out.println(); - if ( HTML ) out.println("

"); - out.println("\nby library:"); - if ( HTML ) out.println("
"); - report2(cyclesByLibraryMap, new File(PREFIX+".byLibrary.txt"),true); - out.println(); - if ( HTML ) out.println("

"); - } - - - - private void report2(Map m, File f,boolean summaryReport) { - long totalReads_1 =0; - long totalReads_2 =0; - long totalReads_unpaired = 0; - SortedSet columns = new TreeSet(); - int maxLength = 0; // maximum read length across all lanes/read ends analyzed - - for( Map.Entry e : m.entrySet() ) { - if ( e.getValue()[0].getMaxReadLength() > maxLength ) maxLength = e.getValue()[0].getMaxReadLength(); - - if ( e.getValue().length == 1 || e.getValue().length == 2 && e.getValue()[1] == null ) { - totalReads_unpaired += e.getValue()[0].getReadCount(); // single end lane - } else { - totalReads_1 += e.getValue()[0].getReadCount(); - totalReads_2 += e.getValue()[1].getReadCount(); - if ( e.getValue()[1].getMaxReadLength() > maxLength ) maxLength = e.getValue()[1].getMaxReadLength(); - } - - columns.add(e.getKey()); - } - - if ( summaryReport ) { - if ( totalReads_1 == 0 && totalReads_2 != 0) { - out.println(" End 1: No reads"); - if ( HTML ) out.println("
"); - } - if ( totalReads_2 == 0 && totalReads_1 != 0 ) { - out.println(" End 2: No reads"); - if ( HTML ) out.println("
"); - } - if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) { - out.println(" No reads found."); - if ( HTML ) out.println("
"); - } - } - - if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) return; - - try { - BufferedWriter w = new BufferedWriter(new FileWriter(f)); - - w.write("cycle"); - - for( String col : columns ) { - CycleStats[] data = m.get(col); - if ( summaryReport ) { - out.print(" "); - out.print(col); - } - - CycleStats end1 = data[0]; - int minL = ( end1 == null ? 0 : end1.getMinReadLength() ); - int maxL = ( end1 == null ? 0 : end1.getMaxReadLength() ); - - if ( data.length == 2 && data[1] != null ) { - if ( summaryReport ) { - out.println(": paired"); - if ( HTML ) out.println("
"); - out.println(" Reads analyzed:"); - if ( HTML ) out.println("
"); - } - CycleStats end2 = data[1]; - - out.print( " End 1: "+ ( end1 == null ? 0 : end1.getReadCount()) ); - if ( minL == maxL ) out.println("; read length = "+minL); - else out.println("; WARNING: variable read length = "+minL+"-"+maxL); - if ( HTML ) out.println("
"); - - out.print( " End 2: "+ ( end2 == null ? 0 : end2.getReadCount()) ); - minL = ( end2 == null ? 0 : end2.getMinReadLength() ); - maxL = ( end2 == null ? 0 : end2.getMaxReadLength() ); - if ( minL == maxL ) out.println("; read length = "+minL); - else out.println("; WARNING: variable read length = "+minL+"-"+maxL); - if ( HTML ) out.println("
"); - } - else { - out.println(": unpaired"); - if ( HTML ) out.println("
"); - out.print( " Reads analyzed: "+ ( end1 == null ? 0 : end1.getReadCount()) ); - if ( minL == maxL ) out.println("; read length = "+minL); - else out.println("; WARNING: variable read length = "+minL+"-"+maxL); - if ( HTML ) out.println("
"); - } - - w.write('\t') ; - w.write(col); - if ( data.length == 1 || data.length == 2 && data[1] == null ) { - w.write(".unpaired"); - w.write('\t'); - w.write(col); - w.write(".unpaired.stddev"); - } else { - w.write(".end1"); - w.write('\t'); - w.write(col); - w.write(".end1.stddev"); - w.write('\t') ; - w.write(col); - w.write(".end2"); - w.write('\t'); - w.write(col); - w.write(".end2.stddev"); - } - } - - w.write('\n'); - - int cycle = 0; - - Map> problems = new HashMap>(); - - while ( cycle < maxLength ) { - w.write(Integer.toString(cycle+1)); - for ( String col : columns ) { - - CycleStats[] data = m.get(col); - CycleStats end1 = data[0]; - w.write('\t'); - if ( end1 == null || cycle >= end1.getMaxReadLength() ) w.write(".\t."); - else { - double aq = end1.getCycleQualAverage(cycle); - w.write(String.format("%.4f\t%.4f",aq,end1.getCycleQualStdDev(cycle))); - recordProblem(aq,cycle, problems,col+".End1"); - } - if ( data.length > 1 && data[1] != null ) { - w.write('\t'); - CycleStats end2 = data[1]; - if ( end2 == null || cycle >= end2.getMaxReadLength() ) w.write(".\t."); - else { - double aq = end2.getCycleQualAverage(cycle); - w.write(String.format("%.4f\t%.4f",aq,end2.getCycleQualStdDev(cycle))); - recordProblem(aq,cycle, problems,col+".End2"); - } - } - } - w.write('\n'); - cycle++; - } - w.close(); - - if ( HTML ) out.println("
"); - - if ( HTML ) out.println("
"); - out.println("\nOUTCOME (threshold at Q="+QTHRESHOLD+"):"); - if ( HTML ) out.println("
"); - for ( String col : columns ) { - List lp = problems.get(col+".End1"); - out.print(" "+col+" End1:"); - if ( lp == null ) { - out.print(" GOOD"); - } else { - for ( PrimitivePair.Int p : lp ) { - out.print(" "+(p.first+1)+"-"); - if ( p.second >= 0 ) out.print((p.second+1)); - else out.print("END"); - } - } - out.println(); - if ( HTML ) out.println("
"); - - lp = problems.get(col+".End2"); - out.print(" "+col+" End2:"); - if ( lp == null ) { - out.print(" GOOD"); - } else { - for ( PrimitivePair.Int p : lp ) { - out.print(" "+(p.first+1)+"-"); - if ( p.second >= 0 ) out.print(p.second); - else out.print("END"); - } - } - out.println(); - if ( HTML ) out.println("
"); - } - - } catch (IOException ioe) { - throw new UserException.CouldNotCreateOutputFile(f, "Failed to write report", ioe); - } - } - - - private void recordProblem(double q, int cycle, Map> problems, String name) { - - PrimitivePair.Int p = null; - List lp = null; - if ( q < QTHRESHOLD ) { // there is a problem - if ( ! problems.containsKey(name) ) { - lp = new ArrayList(); - p = new PrimitivePair.Int(cycle,-1); - lp.add(p); - problems.put(name,lp); - } else { - lp = problems.get(name); - p = lp.get(lp.size()-1); - } - if ( p.second != -1 ) { // if we are not already inside a run of bad qual bases - lp.add(new PrimitivePair.Int(cycle,-1)); // start new run - } - } else { // good base - if ( problems.containsKey(name) ) { // only if we had problem intervals at all, we need to check if the last one needs to be closed - lp = problems.get(name); - p = lp.get(lp.size()-1); - if ( p.second == -1 ) p.second = cycle - 1; - } - } - } - - - static class CycleStats { - private long readCount = 0; - private double[] cycleQualsAv = null; - private double[] cycleQualsSd = null; - private int minL = 1000000000; // read min. length - private int maxL = 0; // read max. length - - public CycleStats(int N) { - readCount = 0; - cycleQualsAv = new double[N]; - cycleQualsSd = new double[N]; - } - - public void add(byte[] quals) { - if ( quals.length > cycleQualsAv.length ) - throw new UserException("A read of length "+quals.length+" encountered, which exceeds specified maximum read length"); - if ( quals.length > maxL ) maxL = quals.length; - if ( quals.length < minL ) minL = quals.length; - readCount++; - for ( int i = 0 ; i < quals.length ; i++ ) { - // NOTE: in the update equaltions below, there is no need to check if readCount == 1 (i.e. - // we are initializing with the very first record) or not. Indeed, the arrays are initialized with - // 0; when the very first value arrives, readCount is 1 and cycleQuals[i] gets set to quals[i] (correct!); - // this will also make the second term in the update equation for Sd (quals[i]-cycleQualsAv[i]) equal - // to 0, so Sd will be initially set to 0. - double oldAvg = cycleQualsAv[i]; // save old mean, will need it for calculation of the variance - cycleQualsAv[i] += ( quals[i] - cycleQualsAv[i] ) / readCount; // update mean - cycleQualsSd[i] += ( quals[i] - oldAvg ) * ( quals[i] - cycleQualsAv[i] ); - } - } - - public long getReadCount() { return readCount; } - public int getMaxReadLength() { return maxL; } - public int getMinReadLength() { return minL; } -// long [] getCycleQualSums() { return cycleQuals; } -// long getCycleQualSum(int i) { return cycleQuals[i]; } - double getCycleQualAverage(int i) { return cycleQualsAv[i]; } - double getCycleQualStdDev(int i) { return Math.sqrt( cycleQualsSd[i]/(readCount-1) ); } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowing.java similarity index 88% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowingWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowing.java index 6f0490fd3..a3df3bc13 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowing.java @@ -24,7 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -32,11 +34,14 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; /** * a walker that simply throws errors. Allows us to test that the engine is behaving as expected with error handling */ -public class ErrorThrowingWalker extends RodWalker implements TreeReducible { +@Hidden +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class ErrorThrowing extends RodWalker implements TreeReducible { @Input(fullName="exception", shortName = "E", doc="Java class of exception to throw", required=true) public String exceptionToThrow; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java similarity index 88% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java index ab5324e39..5082645ad 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java @@ -26,27 +26,20 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequence; -import net.sf.samtools.SAMSequenceRecord; -import org.broad.tribble.Feature; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.collections.ExpandingArrayList; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; -import java.util.Collections; -import java.util.List; /** - * Prints out counts of the number of reference ordered data objects encountered. + * Quality control for the reference fasta * * *

Input

@@ -63,11 +56,12 @@ import java.util.List; *
  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \
- *   -T QCRefWalker
+ *   -T QCRef
  * 
* */ -public class QCRefWalker extends RefWalker { +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) +public class QCRef extends RefWalker { @Output public PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java index 27f9d7b6d..16d614afc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java @@ -27,6 +27,7 @@ import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; @@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -51,8 +53,9 @@ import java.util.Arrays; * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length * of the clipping to the output stream. */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS}) -public class ReadClippingStatsWalker extends ReadWalker { +public class ReadClippingStats extends ReadWalker { @Output protected PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java deleted file mode 100644 index 4425f92c4..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java +++ /dev/null @@ -1,125 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.qc; - -import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.List; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Checks all reads passed through the system to ensure that - * the same read is not passed to the walker multiple consecutive times. - * @author aaron - */ -public class ReadValidationWalker extends ReadWalker { - - // our MD5 sum - private MessageDigest m; - - // private list of md5sums - private final List list = new ArrayList(); - - /** - * The initialize function. - */ - public void initialize() { - try { - m = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new ReviewedStingException("Unable to get the MD5 algorithm. Get a more eXtreme version of JAVA!@!@!!"); - } - } - - /** - * The reads filter function. - * - * @param ref the reference bases that correspond to our read, if a reference was provided - * @param read the read itself, as a SAMRecord - * @return true if the read passes the filter, false if it doesn't - */ - public boolean filter(ReferenceContext ref, GATKSAMRecord read) { - return true; - } - - /** - * The reads map function. - * - * @param ref the reference bases that correspond to our read, if a reference was provided - * @param read the read itself, as a SAMRecord - * @return the read itself - */ - public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { - return read; - } - - /** - * reduceInit is called once before any calls to the map function. We use it here to setup the output - * bam file, if it was specified on the command line - * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise - */ - public SAMRecord reduceInit() { - return null; - } - - /** - * given a read and a output location, reduce by emitting the read - * @param read the read itself - * @param output the output source - * @return the SAMFileWriter, so that the next reduce can emit to the same source - */ - public SAMRecord reduce( SAMRecord read, SAMRecord output ) { - if (output == null) - return read; - if ((read.getReferenceIndex() == output.getReferenceIndex()) && (read.getAlignmentStart() < output.getAlignmentStart())) { - logger.error("saw the read " + read.getReadName() + " duplicated, old alignment = " + output.getAlignmentStart()); - } - else if (read.getReferenceIndex() != output.getReferenceIndex()){ - logger.warn("Switching Chromo"); - } - return read; - } - - - /** - * when we're done traversing, close the reads file - * @param output the SAMFileWriter we've used in the reduce phase - */ - public void onTraversalDone( SAMFileWriter output ) { - if (output != null) { - output.close(); - } - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidation.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidation.java index c9602cd6f..4e67beffe 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidation.java @@ -24,20 +24,18 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; @@ -50,8 +48,9 @@ import java.util.List; /** * a walker for validating (in the style of validating pile-up) the ROD system. */ +@Hidden @Reference(window=@Window(start=-40,stop=40)) -public class RodSystemValidationWalker extends RodWalker { +public class RodSystemValidation extends RodWalker { // the divider to use in some of the text output private static final String DIVIDER = ","; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileup.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileup.java index cd17e4592..7dedd8cf1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileup.java @@ -29,14 +29,19 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature; -import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; @@ -45,10 +50,11 @@ import java.util.Arrays; /** * At every locus in the input set, compares the pileup data (reference base, aligned base from * each overlapping read, and quality score) to the reference pileup data generated by samtools. Samtools' pileup data - * should be specified using the command-line argument '-B pileup,SAMPileup,'. + * should be specified using the command-line argument '-pileup:SAMPileup '. */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @Requires(value={DataSource.READS,DataSource.REFERENCE}) -public class ValidatingPileupWalker extends LocusWalker implements TreeReducible { +public class ValidatingPileup extends LocusWalker implements TreeReducible { @Input(fullName = "pileup", doc="The SAMPileup containing the expected output", required = true) RodBinding pileup; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java deleted file mode 100644 index e1a7772db..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2011 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.Arrays; - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 9/26/11 - */ - -public class ContextCovariate implements ExperimentalCovariate { - - private int CONTEXT_SIZE; - private String allN = ""; - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - CONTEXT_SIZE = RAC.CONTEXT_SIZE; - - if (CONTEXT_SIZE <= 0) - throw new UserException("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead"); - - // initialize allN given the size of the context - for (int i = 0; i < CONTEXT_SIZE; i++) - allN += "N"; - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - byte[] bases = read.getReadBases(); - for (int i = 0; i < read.getReadLength(); i++) - comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i)); - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return str; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java deleted file mode 100755 index 9b0824ed0..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2011 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.commandline.Gatherer; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.PrintStream; -import java.util.HashMap; -import java.util.List; -import java.util.regex.Pattern; - -/** - * User: carneiro - * Date: 3/29/11 - */ - - -public class CountCovariatesGatherer extends Gatherer { - - ///////////////////////////// - // Private Member Variables - ///////////////////////////// - private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); - private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); - private static final String EOF_MARKER = "EOF"; - - private HashMap dataMap; - - - private void addCSVData (String line) { - String[] covariates = line.split(","); - String key = ""; - RecalDatumOptimized values; - - for (int i = 0; i < covariates.length-3; i++) { - key += covariates[i] + ","; - } - - values = new RecalDatumOptimized(Integer.parseInt(covariates[covariates.length-3]), - Integer.parseInt(covariates[covariates.length-2])); - - if (dataMap.get(key) != null) { - RecalDatumOptimized currentValues = dataMap.get(key); - values.increment(currentValues); - } - - dataMap.put(key, values); - } - - @Override - public void gather(List inputs, File output) { - dataMap = new HashMap(); - PrintStream o; - try { - o = new PrintStream(output); - } catch ( FileNotFoundException e) { - throw new UserException("File to be output by CountCovariates Gather function was not found"); - } - - boolean sawEOF = false; - boolean printedHeader = false; - - // Read input files - for ( File RECAL_FILE : inputs) { - try { - for ( String line : new XReadLines(RECAL_FILE) ) { - if ( EOF_MARKER.equals(line) ) { - sawEOF = true; // sanity check - } - else if(COMMENT_PATTERN.matcher(line).matches()) { - ; // It doesn't make any sense to print intermediate comments, unless we merge them somehow (would require strict definition for the header) - } - else if (COVARIATE_PATTERN.matcher(line).matches()) { - if (!printedHeader) - o.println(line); - } - else { // Found a line of data - addCSVData(line); // Parse the line and add the data to the HashMap - } - } - - } catch ( FileNotFoundException e ) { - throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e); - } - - if ( !sawEOF ) { - final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted!"; - throw new UserException.MalformedFile(RECAL_FILE, errorMessage); - } - printedHeader = true; - } - - // Write output file from dataMap - for(String key : dataMap.keySet()) { - RecalDatumOptimized values = dataMap.get(key); - String v = values.getNumObservations() + "," + values.getNumMismatches() + "," + values.empiricalQualByte(); - o.println(key + v); - } - o.println("EOF"); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java deleted file mode 100755 index a99f35f45..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ /dev/null @@ -1,624 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broad.tribble.Feature; -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; -import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -/** - * First pass of the base quality score recalibration -- Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide). - * - *

- * This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating - * only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative - * of poor base quality. This walker generates tables based on various user-specified covariates (such as read group, - * reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical - * probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations. - * The output file is a CSV list of (the several covariate values, num observations, num mismatches, empirical quality score). - *

- * Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will be added for the user regardless of whether or not they were specified. - * - *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration - * - *

Input

- *

- * The input read data whose base quality scores need to be assessed. - *

- * A database of known polymorphic sites to skip over. - *

- * - *

Output

- *

- * A recalibration table file in CSV format that is used by the TableRecalibration walker. - * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score. - * - * The first 20 lines of such a file is shown below. - * * The file begins with a series of comment lines describing: - * ** The number of counted loci - * ** The number of counted bases - * ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases - * - * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records. - * - * * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change - * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of - * reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate. - * - *

- * # Counted Sites    19451059
- * # Counted Bases    56582018
- * # Skipped Sites    82666
- * # Fraction Skipped 1 / 235 bp
- * ReadGroup,QualityScore,Cycle,Dinuc,nObservations,nMismatches,Qempirical
- * SRR006446,11,65,CA,9,1,10
- * SRR006446,11,48,TA,10,0,40
- * SRR006446,11,67,AA,27,0,40
- * SRR006446,11,61,GA,11,1,10
- * SRR006446,12,34,CA,47,1,17
- * SRR006446,12,30,GA,52,1,17
- * SRR006446,12,36,AA,352,1,25
- * SRR006446,12,17,TA,182,11,12
- * SRR006446,11,48,TG,2,0,40
- * SRR006446,11,67,AG,1,0,40
- * SRR006446,12,34,CG,9,0,40
- * SRR006446,12,30,GG,43,0,40
- * ERR001876,4,31,AG,1,0,40
- * ERR001876,4,31,AT,2,2,1
- * ERR001876,4,31,CA,1,0,40
- * 
- *

- * - *

Examples

- *
- * java -Xmx4g -jar GenomeAnalysisTK.jar \
- *   -R resources/Homo_sapiens_assembly18.fasta \
- *   -knownSites bundle/hg18/dbsnp_132.hg18.vcf \
- *   -knownSites another/optional/setOfSitesToMask.vcf \
- *   -I my_reads.bam \
- *   -T CountCovariates \
- *   -cov ReadGroupCovariate \
- *   -cov QualityScoreCovariate \
- *   -cov CycleCovariate \
- *   -cov DinucCovariate \
- *   -recalFile my_reads.recal_data.csv
- * 
- */ - -@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) -@By(DataSource.READS) // Only look at covered loci, not every loci of the reference file -@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) -// Filter out all reads with zero or unavailable mapping quality -@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) -// This walker requires both -I input.bam and -R reference.fasta -@PartitionBy(PartitionType.LOCUS) -public class CountCovariatesWalker extends LocusWalker implements TreeReducible { - - ///////////////////////////// - // Constants - ///////////////////////////// - private static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; //used to label GATKSAMRecords that should be skipped. - private static final String SEEN_ATTRIBUTE = "SEEN"; //used to label GATKSAMRecords as processed. - private static final String COVARS_ATTRIBUTE = "COVARS"; //used to store covariates array as a temporary attribute inside GATKSAMRecord. - - ///////////////////////////// - // Shared Arguments - ///////////////////////////// - @ArgumentCollection - private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - - ///////////////////////////// - // Command Line Arguments - ///////////////////////////// - /** - * This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference, - * so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.) - * for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites. - * Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument. - */ - @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false) - public List> knownSites = Collections.emptyList(); - - /** - * After the header, data records occur one per line until the end of the file. The first several items on a line are the - * values of the individual covariates and will change depending on which covariates were specified at runtime. The last - * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, - * and the raw empirical quality score calculated by phred-scaling the mismatch rate. - */ - @Output(fullName = "recal_file", shortName = "recalFile", required = true, doc = "Filename for the output covariates table recalibration file") - @Gather(CountCovariatesGatherer.class) - public PrintStream RECAL_FILE; - - @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false) - private boolean LIST_ONLY = false; - - /** - * See the -list argument to view available covariates. - */ - @Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false) - private String[] COVARIATES = null; - @Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false) - private boolean USE_STANDARD_COVARIATES = false; - - ///////////////////////////// - // Debugging-only Arguments - ///////////////////////////// - @Argument(fullName = "dont_sort_output", shortName = "unsorted", required = false, doc = "If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.") - private boolean DONT_SORT_OUTPUT = false; - - /** - * This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option. - */ - @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.") - private boolean RUN_WITHOUT_DBSNP = false; - - ///////////////////////////// - // Private Member Variables - ///////////////////////////// - private final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps - private final ArrayList requestedCovariates = new ArrayList(); // A list to hold the covariate objects that were requested - private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) - private static int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000; // how often to validate dbsnp mismatch rate (in terms of loci seen) - - public static class CountedData { - private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file - private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file - private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file - private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base - private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted. - - private long dbSNPCountsMM = 0, dbSNPCountsBases = 0; // mismatch/base counts for dbSNP loci - private long novelCountsMM = 0, novelCountsBases = 0; // mismatch/base counts for non-dbSNP loci - private int lociSinceLastDbsnpCheck = 0; // loci since last dbsnp validation - - /** - * Adds the values of other to this, returning this - * - * @param other - * @return this object - */ - public CountedData add(CountedData other) { - countedSites += other.countedSites; - countedBases += other.countedBases; - skippedSites += other.skippedSites; - solidInsertedReferenceBases += other.solidInsertedReferenceBases; - otherColorSpaceInconsistency += other.otherColorSpaceInconsistency; - dbSNPCountsMM += other.dbSNPCountsMM; - dbSNPCountsBases += other.dbSNPCountsBases; - novelCountsMM += other.novelCountsMM; - novelCountsBases += other.novelCountsBases; - lociSinceLastDbsnpCheck += other.lociSinceLastDbsnpCheck; - return this; - } - } - - //--------------------------------------------------------------------------------------------------------------- - // - // initialize - // - //--------------------------------------------------------------------------------------------------------------- - - /** - * Parse the -cov arguments and create a list of covariates to be used here - * Based on the covariates' estimates for initial capacity allocate the data hashmap - */ - public void initialize() { - - if (RAC.FORCE_PLATFORM != null) { - RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; - } - - // Get a list of all available covariates - final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); - final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); - - // Print and exit if that's what was requested - if (LIST_ONLY) { - logger.info("Available covariates:"); - for (Class covClass : covariateClasses) { - logger.info(covClass.getSimpleName()); - } - logger.info(""); - - System.exit(0); // Early exit here because user requested it - } - - // Warn the user if no dbSNP file or other variant mask was specified - if (knownSites.isEmpty() && !RUN_WITHOUT_DBSNP) { - throw new UserException.CommandLineException("This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."); - } - - // Initialize the requested covariates by parsing the -cov argument - // First add the required covariates - if (requiredClasses.size() == 2) { // readGroup and reported quality score - requestedCovariates.add(new ReadGroupCovariate()); // Order is important here - requestedCovariates.add(new QualityScoreCovariate()); - } - else { - throw new UserException.CommandLineException("There are more required covariates than expected. The instantiation list needs to be updated with the new required covariate and in the correct order."); - } - // Next add the standard covariates if -standard was specified by the user - if (USE_STANDARD_COVARIATES) { - // We want the standard covariates to appear in a consistent order but the packageUtils method gives a random order - // A list of Classes can't be sorted, but a list of Class names can be - final List standardClassNames = new ArrayList(); - for (Class covClass : standardClasses) { - standardClassNames.add(covClass.getName()); - } - Collections.sort(standardClassNames); // Sort the list of class names - for (String className : standardClassNames) { - for (Class covClass : standardClasses) { // Find the class that matches this class name - if (covClass.getName().equals(className)) { - try { - final Covariate covariate = (Covariate) covClass.newInstance(); - requestedCovariates.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - } - } - } - // Finally parse the -cov arguments that were provided, skipping over the ones already specified - if (COVARIATES != null) { - for (String requestedCovariateString : COVARIATES) { - boolean foundClass = false; - for (Class covClass : covariateClasses) { - if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class - foundClass = true; - if (!requiredClasses.contains(covClass) && (!USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { - try { - // Now that we've found a matching class, try to instantiate it - final Covariate covariate = (Covariate) covClass.newInstance(); - requestedCovariates.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - } - } - - if (!foundClass) { - throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); - } - } - } - - logger.info("The covariates being used here: "); - for (Covariate cov : requestedCovariates) { - logger.info("\t" + cov.getClass().getSimpleName()); - cov.initialize(RAC); // Initialize any covariate member variables using the shared argument collection - } - } - - //--------------------------------------------------------------------------------------------------------------- - // - // map - // - //--------------------------------------------------------------------------------------------------------------- - - /** - * For each read at this locus get the various covariate values and increment that location in the map based on - * whether or not the base matches the reference at this particular location - * - * @param tracker The reference metadata tracker - * @param ref The reference context - * @param context The alignment context - * @return Returns 1, but this value isn't used in the reduce step - */ - public CountedData map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - // Only use data from non-dbsnp sites - // Assume every mismatch at a non-dbsnp site is indicative of poor quality - CountedData counter = new CountedData(); - if (tracker.getValues(knownSites).size() == 0) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed - // For each read at this locus - for (final PileupElement p : context.getBasePileup()) { - final GATKSAMRecord gatkRead = p.getRead(); - int offset = p.getOffset(); - - if (gatkRead.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE)) { - continue; - } - - if (!gatkRead.containsTemporaryAttribute(SEEN_ATTRIBUTE)) { - gatkRead.setTemporaryAttribute(SEEN_ATTRIBUTE, true); - RecalDataManager.parseSAMRecord(gatkRead, RAC); - - // Skip over reads with no calls in the color space if the user requested it - if (!(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace(gatkRead)) { - gatkRead.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true); - continue; - } - - RecalDataManager.parseColorSpace(gatkRead); - gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates)); - } - - // Skip this position if base quality is zero - if (gatkRead.getBaseQualities()[offset] > 0) { - - byte[] bases = gatkRead.getReadBases(); - byte refBase = ref.getBase(); - - // Skip if this base is an 'N' or etc. - if (BaseUtils.isRegularBase(bases[offset])) { - - // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it - if (!gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING || - !RecalDataManager.isInconsistentColorSpace(gatkRead, offset)) { - - // This base finally passed all the checks for a good base, so add it to the big data hashmap - updateDataFromRead(counter, gatkRead, offset, refBase); - - } - else { // calculate SOLID reference insertion rate - if (refBase == bases[offset]) { - counter.solidInsertedReferenceBases++; - } - else { - counter.otherColorSpaceInconsistency++; - } - } - } - } - } - counter.countedSites++; - } - else { // We skipped over the dbSNP site, and we are only processing every Nth locus - counter.skippedSites++; - updateMismatchCounts(counter, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable - } - - return counter; - } - - /** - * Update the mismatch / total_base counts for a given class of loci. - * - * @param counter The CountedData to be updated - * @param context The AlignmentContext which holds the reads covered by this locus - * @param refBase The reference base - */ - private static void updateMismatchCounts(CountedData counter, final AlignmentContext context, final byte refBase) { - for (PileupElement p : context.getBasePileup()) { - final byte readBase = p.getBase(); - final int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase); - final int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase); - - if (readBaseIndex != -1 && refBaseIndex != -1) { - if (readBaseIndex != refBaseIndex) { - counter.novelCountsMM++; - } - counter.novelCountsBases++; - } - } - } - - /** - * Major workhorse routine for this walker. - * Loop through the list of requested covariates and pick out the value from the read, offset, and reference - * Using the list of covariate values as a key, pick out the RecalDatum and increment, - * adding one to the number of observations and potentially one to the number of mismatches - * Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls - * because pulling things out of the SAMRecord is an expensive operation. - * - * @param counter Data structure which holds the counted bases - * @param gatkRead The SAMRecord holding all the data for this read - * @param offset The offset in the read for this locus - * @param refBase The reference base at this locus - */ - private void updateDataFromRead(CountedData counter, final GATKSAMRecord gatkRead, final int offset, final byte refBase) { - final Object[][] covars = (Comparable[][]) gatkRead.getTemporaryAttribute(COVARS_ATTRIBUTE); - final Object[] key = covars[offset]; - - // Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap - final NestedHashMap data = dataManager.data; //optimization - create local reference - RecalDatumOptimized datum = (RecalDatumOptimized) data.get(key); - if (datum == null) { // key doesn't exist yet in the map so make a new bucket and add it - // initialized with zeros, will be incremented at end of method - datum = (RecalDatumOptimized) data.put(new RecalDatumOptimized(), true, (Object[]) key); - } - - // Need the bases to determine whether or not we have a mismatch - final byte base = gatkRead.getReadBases()[offset]; - final long curMismatches = datum.getNumMismatches(); - - // Add one to the number of observations and potentially one to the number of mismatches - datum.incrementBaseCounts(base, refBase); - counter.countedBases++; - counter.novelCountsBases++; - counter.novelCountsMM += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable - } - - //--------------------------------------------------------------------------------------------------------------- - // - // reduce - // - //--------------------------------------------------------------------------------------------------------------- - - /** - * Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker. - * - * @return returns A PrintStream created from the -recalFile filename argument specified to the walker - */ - public CountedData reduceInit() { - return new CountedData(); - } - - /** - * The Reduce method doesn't do anything for this walker. - * - * @param mapped Result of the map. This value is immediately ignored. - * @param sum The summing CountedData used to output the CSV data - * @return returns The sum used to output the CSV data - */ - public CountedData reduce(CountedData mapped, CountedData sum) { - // Do a dbSNP sanity check every so often - return validatingDbsnpMismatchRate(sum.add(mapped)); - } - - /** - * Validate the dbSNP reference mismatch rates. - */ - private CountedData validatingDbsnpMismatchRate(CountedData counter) { - if (++counter.lociSinceLastDbsnpCheck >= DBSNP_VALIDATION_CHECK_FREQUENCY) { - counter.lociSinceLastDbsnpCheck = 0; - - if (counter.novelCountsBases != 0L && counter.dbSNPCountsBases != 0L) { - final double fractionMM_novel = (double) counter.novelCountsMM / (double) counter.novelCountsBases; - final double fractionMM_dbsnp = (double) counter.dbSNPCountsMM / (double) counter.dbSNPCountsBases; - - if (fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel) { - Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " + String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel)); - DBSNP_VALIDATION_CHECK_FREQUENCY *= 2; // Don't annoyingly output the warning message every megabase of a large file - } - } - } - - return counter; - } - - public CountedData treeReduce(CountedData sum1, CountedData sum2) { - return validatingDbsnpMismatchRate(sum1.add(sum2)); - } - - /** - * Write out the full data hashmap to disk in CSV format - * - * @param sum The CountedData to write out to RECAL_FILE - */ - public void onTraversalDone(CountedData sum) { - logger.info("Writing raw recalibration data..."); - if (sum.countedBases == 0L) { - throw new UserException.BadInput("Could not find any usable data in the input BAM file(s)."); - } - outputToCSV(sum, RECAL_FILE); - logger.info("...done!"); - } - - /** - * For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format - * - * @param recalTableStream The PrintStream to write out to - */ - private void outputToCSV(CountedData sum, final PrintStream recalTableStream) { - recalTableStream.printf("# Counted Sites %d%n", sum.countedSites); - recalTableStream.printf("# Counted Bases %d%n", sum.countedBases); - recalTableStream.printf("# Skipped Sites %d%n", sum.skippedSites); - recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double) sum.countedSites / sum.skippedSites); - - if (sum.solidInsertedReferenceBases != 0) { - recalTableStream.printf("# Fraction SOLiD inserted reference 1 / %.0f bases%n", (double) sum.countedBases / sum.solidInsertedReferenceBases); - recalTableStream.printf("# Fraction other color space inconsistencies 1 / %.0f bases%n", (double) sum.countedBases / sum.otherColorSpaceInconsistency); - } - - // Output header saying which covariates were used and in what order - for (Covariate cov : requestedCovariates) { - recalTableStream.print(cov.getClass().getSimpleName().split("Covariate")[0] + ","); - } - recalTableStream.println("nObservations,nMismatches,Qempirical"); - - if (DONT_SORT_OUTPUT) { - printMappings(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data); - } - else { - printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data); - } - - // print out an EOF marker - recalTableStream.println(TableRecalibrationWalker.EOF_MARKER); - } - - private void printMappingsSorted(final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) { - final ArrayList keyList = new ArrayList(); - for (Object comp : data.keySet()) { - keyList.add((Comparable) comp); - } - - Collections.sort(keyList); - - for (Comparable comp : keyList) { - key[curPos] = comp; - final Object val = data.get(comp); - if (val instanceof RecalDatumOptimized) { // We are at the end of the nested hash maps - // For each Covariate in the key - for (Object compToPrint : key) { - // Output the Covariate's value - recalTableStream.print(compToPrint + ","); - } - // Output the RecalDatum entry - recalTableStream.println(((RecalDatumOptimized) val).outputToCSV()); - } - else { // Another layer in the nested hash map - printMappingsSorted(recalTableStream, curPos + 1, key, (Map) val); - } - } - } - - private void printMappings(final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) { - for (Object comp : data.keySet()) { - key[curPos] = comp; - final Object val = data.get(comp); - if (val instanceof RecalDatumOptimized) { // We are at the end of the nested hash maps - // For each Covariate in the key - for (Object compToPrint : key) { - // Output the Covariate's value - recalTableStream.print(compToPrint + ","); - } - // Output the RecalDatum entry - recalTableStream.println(((RecalDatumOptimized) val).outputToCSV()); - } - else { // Another layer in the nested hash map - printMappings(recalTableStream, curPos + 1, key, (Map) val); - } - } - } -} - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java deleted file mode 100755 index 9d5747023..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java +++ /dev/null @@ -1,56 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Oct 30, 2009 - * - * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read. - * In general most error checking and adjustments to the data are done before the call to the covariates getValue methods in order to speed up the code. - * This unfortunately muddies the code, but most of these corrections can be done per read while the covariates get called per base, resulting in a big speed up. - */ - -public interface Covariate { - public void initialize(RecalibrationArgumentCollection RAC); // Initialize any member variables using the command-line arguments passed to the walkers - - public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker - - public void getValues(GATKSAMRecord read, Comparable[] comparable); - //Takes an array of size (at least) read.getReadLength() and fills it with covariate - //values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows - //read-specific calculations to be done just once rather than for each offset. -} - -interface RequiredCovariate extends Covariate {} - -interface StandardCovariate extends Covariate {} - -interface ExperimentalCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java deleted file mode 100755 index b8d13ca10..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ /dev/null @@ -1,212 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.NGSPlatform; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.EnumSet; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Oct 30, 2009 - * - * The Cycle covariate. - * For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read) - * For 454 the cycle is the TACG flow cycle, that is, each flow grabs all the TACG's in order in a single cycle - * For example, for the read: AAACCCCGAAATTTTTACTG - * the cycle would be 11111111222333333344 - * For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round - */ - -public class CycleCovariate implements StandardCovariate { - private final static EnumSet DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS); - private final static EnumSet FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT); - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - if (RAC.DEFAULT_PLATFORM != null) { - if (RAC.DEFAULT_PLATFORM.equalsIgnoreCase("SLX") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("ILLUMINA") || - RAC.DEFAULT_PLATFORM.contains("454") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("SOLID") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("ABI_SOLID")) { - // nothing to do - } - else { - throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform. Implemented options are illumina, 454, and solid"); - } - } - } - - // Used to pick out the covariate's value from attributes of the read - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - - //----------------------------- - // Illumina, Solid, PacBio, and Complete Genomics - //----------------------------- - - final NGSPlatform ngsPlatform = read.getNGSPlatform(); - if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) { - final int init; - final int increment; - if (!read.getReadNegativeStrandFlag()) { - // Differentiate between first and second of pair. - // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group - // to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair. - // Therefore the cycle covariate must differentiate between first and second of pair reads. - // This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because - // the current sequential model would consider the effects independently instead of jointly. - if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) { - //second of pair, positive strand - init = -1; - increment = -1; - } - else { - //first of pair, positive strand - init = 1; - increment = 1; - } - - } - else { - if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) { - //second of pair, negative strand - init = -read.getReadLength(); - increment = 1; - } - else { - //first of pair, negative strand - init = read.getReadLength(); - increment = -1; - } - } - - int cycle = init; - for (int i = 0; i < read.getReadLength(); i++) { - comparable[i] = cycle; - cycle += increment; - } - } - - //----------------------------- - // 454 and Ion Torrent - //----------------------------- - else if (FLOW_CYCLE_PLATFORMS.contains(ngsPlatform)) { - - final int readLength = read.getReadLength(); - final byte[] bases = read.getReadBases(); - - // Differentiate between first and second of pair. - // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group - // to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair. - // Therefore the cycle covariate must differentiate between first and second of pair reads. - // This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because - // the current sequential model would consider the effects independently instead of jointly. - final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag(); - - int cycle = multiplyByNegative1 ? -1 : 1; - - // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change - // For example, AAAAAAA was probably read in two flow cycles but here we count it as one - if (!read.getReadNegativeStrandFlag()) { // Forward direction - int iii = 0; - while (iii < readLength) { - while (iii < readLength && bases[iii] == (byte) 'T') { - comparable[iii] = cycle; - iii++; - } - while (iii < readLength && bases[iii] == (byte) 'A') { - comparable[iii] = cycle; - iii++; - } - while (iii < readLength && bases[iii] == (byte) 'C') { - comparable[iii] = cycle; - iii++; - } - while (iii < readLength && bases[iii] == (byte) 'G') { - comparable[iii] = cycle; - iii++; - } - if (iii < readLength) { - if (multiplyByNegative1) - cycle--; - else - cycle++; - } - if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) { - comparable[iii] = cycle; - iii++; - } - - } - } - else { // Negative direction - int iii = readLength - 1; - while (iii >= 0) { - while (iii >= 0 && bases[iii] == (byte) 'T') { - comparable[iii] = cycle; - iii--; - } - while (iii >= 0 && bases[iii] == (byte) 'A') { - comparable[iii] = cycle; - iii--; - } - while (iii >= 0 && bases[iii] == (byte) 'C') { - comparable[iii] = cycle; - iii--; - } - while (iii >= 0 && bases[iii] == (byte) 'G') { - comparable[iii] = cycle; - iii--; - } - if (iii >= 0) { - if (multiplyByNegative1) - cycle--; - else - cycle++; - } - if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) { - comparable[iii] = cycle; - iii--; - } - } - } - } - else { - throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid"); - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return Integer.parseInt(str); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Dinuc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Dinuc.java deleted file mode 100755 index 9e1c2fe1f..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Dinuc.java +++ /dev/null @@ -1,71 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 16, 2009 - */ -public class Dinuc implements Comparable{ - private byte first; - private byte second; - - public Dinuc() { - first = 0; - second = 0; - } - - public Dinuc(final byte _first, final byte _second) { - first = _first; - second = _second; - } - - public final void setValues(final byte _first, final byte _second) { - first = _first; - second = _second; - } - - public int compareTo(final Dinuc that) { - if( this.first > that.first ) { return 1; } - else if( this.first < that.first ) { return -1; } - else { //this.first equals that.first - if( this.second > that.second ) { return 1; } - else if( this.second < that.second ) { return -1; } - else { return 0; } - } - - } - - public static int hashBytes(final byte byte1, final byte byte2) { - return byte1 << 8 + byte2; - } - - public String toString() { // This method call is how the Dinuc will get written out to the table recalibration file - byte[] byteArray = {first,second}; - return new String(byteArray); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java deleted file mode 100755 index 9a401d09f..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ /dev/null @@ -1,129 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.HashMap; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 3, 2009 - * - * The Dinucleotide covariate. This base and the one that came before it in the read, remembering to swap directions if negative strand read. - * This covariate assumes that the bases have been swapped to their complement base counterpart if this is a negative strand read. - * This assumption is made to speed up the code. - */ - -public class DinucCovariate implements StandardCovariate { - - private static final byte NO_CALL = (byte) 'N'; - private static final Dinuc NO_DINUC = new Dinuc(NO_CALL, NO_CALL); - - private HashMap dinucHashMap; - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - final byte[] BASES = {(byte) 'A', (byte) 'C', (byte) 'G', (byte) 'T'}; - dinucHashMap = new HashMap(); - for (byte byte1 : BASES) { - for (byte byte2 : BASES) { - dinucHashMap.put(Dinuc.hashBytes(byte1, byte2), new Dinuc(byte1, byte2)); // This might seem silly, but Strings are too slow - } - } - // Add the "no dinuc" entry too - dinucHashMap.put(Dinuc.hashBytes(NO_CALL, NO_CALL), NO_DINUC); - } - - /** - * Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read. - */ - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - final HashMap dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap - final int readLength = read.getReadLength(); - final boolean negativeStrand = read.getReadNegativeStrandFlag(); - byte[] bases = read.getReadBases(); - byte base; - byte prevBase; - int offset = 0; - // If this is a negative strand read then we need to reverse the direction for our previous base - - if (negativeStrand) { - bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place - } - comparable[0] = NO_DINUC; // No dinuc at the beginning of the read - - prevBase = bases[0]; - offset++; - while (offset < readLength) { - // Note: We are using the previous base in the read, not the - // previous base in the reference. This is done in part to be consistent with unmapped reads. - base = bases[offset]; - if (BaseUtils.isRegularBase(prevBase)) { - comparable[offset] = dinucHashMapRef.get(Dinuc.hashBytes(prevBase, base)); - } - else { - comparable[offset] = NO_DINUC; - } - - offset++; - prevBase = base; - } - if (negativeStrand) { - reverse(comparable); - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - byte[] bytes = str.getBytes(); - final Dinuc returnDinuc = dinucHashMap.get(Dinuc.hashBytes(bytes[0], bytes[1])); - if (returnDinuc.compareTo(NO_DINUC) == 0) { - return null; - } - return returnDinuc; - } - - /** - * Reverses the given array in place. - * - * @param array any array - */ - private static void reverse(final Comparable[] array) { - final int arrayLength = array.length; - for (int l = 0, r = arrayLength - 1; l < r; l++, r--) { - final Comparable temp = array[l]; - array[l] = array[r]; - array[r] = temp; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java deleted file mode 100755 index e9bfa3513..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java +++ /dev/null @@ -1,55 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: carneiro - * Date: Mar 22, 2012 - * - * Object that holds the empirical quality and estimated reported quality values for on-the-fly recalibration. This is a simplification of the RecalDatum object - */ - -public class EmpiricalQual { - - private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations - private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) - - private EmpiricalQual() {} - - public EmpiricalQual(final double estimatedQReported, final double empiricalQuality) { - this.estimatedQReported = estimatedQReported; - this.empiricalQuality = empiricalQuality; - } - - public final double getEstimatedQReported() { - return estimatedQReported; - } - - public final double getEmpiricalQuality() { - return empiricalQuality; - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java deleted file mode 100755 index 14ffd35a4..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Jan 29, 2010 - * - * The number of previous N bases (along the direction of the read) that contain G's and C's. The goal is to correct for dye slippage. - * Only valid for Illumina reads. Otherwise return -1. - */ - -public class GCContentCovariate implements ExperimentalCovariate { - - private int numBack = 7; - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - numBack = RAC.HOMOPOLYMER_NBACK; - } - - // Used to pick out the covariate's value from attributes of the read - private Comparable getValue(final SAMRecord read, final int offset) { - - // ATTGCCCCGTAAAAAAAGAGAA - // 0000123456654321001122 - - if (read.getReadGroup().getPlatform().equalsIgnoreCase("ILLUMINA") || read.getReadGroup().getPlatform().equalsIgnoreCase("SLX")) { - int numGC = 0; - int startPos; - int stopPos; - final byte[] bases = read.getReadBases(); - if (!read.getReadNegativeStrandFlag()) { // Forward direction - startPos = Math.max(offset - numBack, 0); - stopPos = Math.max(offset - 1, 0); - } - else { // Negative direction - startPos = Math.min(offset + 2, bases.length); - stopPos = Math.min(offset + numBack + 1, bases.length); - } - - for (int iii = startPos; iii < stopPos; iii++) { - if (bases[iii] == (byte) 'G' || bases[iii] == (byte) 'C') { - numGC++; - } - } - - return numGC; - } - else { // This effect is specific to the Illumina platform - return -1; - } - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - for (int iii = 0; iii < read.getReadLength(); iii++) { - comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return Integer.parseInt(str); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java deleted file mode 100755 index 004fb0bdb..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java +++ /dev/null @@ -1,109 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Dec 4, 2009 - * - * The Homopolymer Run Covariate. This is the number of consecutive bases in the previous N that match the current base. - * For example, if N = 10: - * ATTGCCCCGTAAAAAAAAATA - * 001001230001234567800 - */ - -public class HomopolymerCovariate implements ExperimentalCovariate { - - private int numBack; - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - numBack = RAC.HOMOPOLYMER_NBACK; - } - - // Used to pick out the covariate's value from attributes of the read - private Comparable getValue(final SAMRecord read, final int offset) { - - // This block of code is for if you don't want to only count consecutive bases - // ATTGCCCCGTAAAAAAAAATA - // 001001231211234567819 - /* - int numAgree = 0; // The number of bases that agree with you in the previous numBack bases of the read - int startPos = 0; - int stopPos = 0; - byte[] bases = read.getReadBases(); - byte thisBase = bases[offset]; - if( !read.getReadNegativeStrandFlag() ) { // Forward direction - startPos = Math.max(offset - numBack, 0); - stopPos = Math.max(offset - 1, 0); - } else { // Negative direction - startPos = Math.min(offset + 2, bases.length); - stopPos = Math.min(offset + numBack + 1, bases.length); - } - - for( int iii = startPos; iii < stopPos; iii++ ) { - if( bases[iii] == thisBase ) { numAgree++; } - } - */ - - int numAgree = 0; // The number of consecutive bases that agree with you in the previous numBack bases of the read - final byte[] bases = read.getReadBases(); - int iii = offset; - if (!read.getReadNegativeStrandFlag()) { // Forward direction - while (iii <= bases.length - 2 && bases[iii] == bases[iii + 1] && numAgree < numBack) { - numAgree++; - iii++; - } - } - else { // Negative direction - while (iii >= 1 && bases[iii] == bases[iii - 1] && numAgree < numBack) { - numAgree++; - iii--; - } - } - - return numAgree; - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - for (int iii = 0; iii < read.getReadLength(); iii++) { - comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return Integer.parseInt(str); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java deleted file mode 100755 index 54fa18106..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java +++ /dev/null @@ -1,63 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 4, 2009 - * - * The Mapping Quality covariate. - */ - -public class MappingQualityCovariate implements ExperimentalCovariate { - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - } - - // Used to pick out the covariate's value from attributes of the read - private Comparable getValue(final GATKSAMRecord read) { - return read.getMappingQuality(); - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return Integer.parseInt(str); - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - for (int iii = 0; iii < read.getReadLength(); iii++) { - comparable[iii] = getValue(read); // BUGBUG: this can be optimized - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java deleted file mode 100755 index ecaa55006..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 4, 2009 - * - * The Minimum Neighborhood Quality Score covariate, originally described by Chris Hartl. - * This covariate is the minimum base quality score in the read in a small window around the current base. - */ - -public class MinimumNQSCovariate implements ExperimentalCovariate { - - private int windowReach; // How far in each direction from the current base to look - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - windowReach = RAC.WINDOW_SIZE / 2; // integer division - } - - // Used to pick out the covariate's value from attributes of the read - private Comparable getValue(final SAMRecord read, final int offset) { - - // Loop over the list of base quality scores in the window and find the minimum - final byte[] quals = read.getBaseQualities(); - int minQual = quals[offset]; - final int minIndex = Math.max(offset - windowReach, 0); - final int maxIndex = Math.min(offset + windowReach, quals.length - 1); - for (int iii = minIndex; iii < maxIndex; iii++) { - if (quals[iii] < minQual) { - minQual = quals[iii]; - } - } - return minQual; - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - for (int iii = 0; iii < read.getReadLength(); iii++) { - comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return Integer.parseInt(str); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java deleted file mode 100755 index fd720697f..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java +++ /dev/null @@ -1,69 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 18, 2009 - * - * The Position covariate. It is a lot like the Cycle covariate except it always returns the offset regardless of which platform the read came from. - * This is the Solexa definition of machine cycle and the covariate that was always being used in the original version of the recalibrator. - */ - -public class PositionCovariate implements ExperimentalCovariate { - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - } - - // Used to pick out the covariate's value from attributes of the read - private Comparable getValue(final SAMRecord read, final int offset) { - int cycle = offset; - if (read.getReadNegativeStrandFlag()) { - cycle = read.getReadLength() - (offset + 1); - } - return cycle; - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - for (int iii = 0; iii < read.getReadLength(); iii++) { - comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return Integer.parseInt(str); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java deleted file mode 100755 index d6bdea5bf..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 13, 2009 - * - * The Primer Round covariate. - * For Solexa and 454 this is the same value of the length of the read. - * For SOLiD this is different for each position according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf - */ - -public class PrimerRoundCovariate implements ExperimentalCovariate { - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - } - - // Used to pick out the covariate's value from attributes of the read - private Comparable getValue(final SAMRecord read, final int offset) { - if (read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || read.getReadGroup().getPlatform().equalsIgnoreCase("ABI_SOLID")) { - int pos = offset; - if (read.getReadNegativeStrandFlag()) { - pos = read.getReadLength() - (offset + 1); - } - return pos % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf - } - else { - return 1; // nothing to do here because it is always the same - } - - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - for (int iii = 0; iii < read.getReadLength(); iii++) { - comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return Integer.parseInt(str); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java deleted file mode 100755 index a29a0530c..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java +++ /dev/null @@ -1,61 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.Arrays; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 3, 2009 - * - * The Reported Quality Score covariate. - */ - -public class QualityScoreCovariate implements RequiredCovariate { - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - } - - @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { - byte[] baseQualities = read.getBaseQualities(); - for (int i = 0; i < read.getReadLength(); i++) { - comparable[i] = (int) baseQualities[i]; - } - } - - // Used to get the covariate's value from input csv file in TableRecalibrationWalker - @Override - public final Comparable getValue(final String str) { - return Integer.parseInt(str); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java deleted file mode 100644 index 1a6b8cfcb..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ /dev/null @@ -1,697 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import net.sf.samtools.SAMUtils; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 6, 2009 - * - * This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions. - * It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias. - * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration. - */ - -public class RecalDataManager { - - public final NestedHashMap data; // The full dataset - private final NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed - private final NestedHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed - private final ArrayList dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed - - public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores - public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams - public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams - public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color - private static boolean warnUserNullPlatform = false; - - public enum SOLID_RECAL_MODE { - /** - * Treat reference inserted bases as reference matching bases. Very unsafe! - */ - DO_NOTHING, - /** - * Set reference inserted bases and the previous base (because of color space alignment details) to Q0. This is the default option. - */ - SET_Q_ZERO, - /** - * In addition to setting the quality scores to zero, also set the base itself to 'N'. This is useful to visualize in IGV. - */ - SET_Q_ZERO_BASE_N, - /** - * Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference. - */ - REMOVE_REF_BIAS - } - - public enum SOLID_NOCALL_STRATEGY { - /** - * When a no call is detected throw an exception to alert the user that recalibrating this SOLiD data is unsafe. This is the default option. - */ - THROW_EXCEPTION, - /** - * Leave the read in the output bam completely untouched. This mode is only okay if the no calls are very rare. - */ - LEAVE_READ_UNRECALIBRATED, - /** - * Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses. - */ - PURGE_READ - } - - public RecalDataManager() { - data = new NestedHashMap(); - dataCollapsedReadGroup = null; - dataCollapsedQualityScore = null; - dataCollapsedByCovariate = null; - } - - public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) { - if (createCollapsedTables) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker - data = null; - dataCollapsedReadGroup = new NestedHashMap(); - dataCollapsedQualityScore = new NestedHashMap(); - dataCollapsedByCovariate = new ArrayList(); - for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate - dataCollapsedByCovariate.add(new NestedHashMap()); - } - } - else { - data = new NestedHashMap(); - dataCollapsedReadGroup = null; - dataCollapsedQualityScore = null; - dataCollapsedByCovariate = null; - } - } - - /** - * Add the given mapping to all of the collapsed hash tables - * - * @param key The list of comparables that is the key for this mapping - * @param fullDatum The RecalDatum which is the data for this mapping - * @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table - */ - public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN) { - - // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around - //data.put(key, thisDatum); // add the mapping to the main table - - final int qualityScore = Integer.parseInt(key[1].toString()); - final Object[] readGroupCollapsedKey = new Object[1]; - final Object[] qualityScoreCollapsedKey = new Object[2]; - final Object[] covariateCollapsedKey = new Object[3]; - RecalDatum collapsedDatum; - - // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed - if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) { - readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group - collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey); - if (collapsedDatum == null) { - dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey); - } - else { - collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported - } - } - - // Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed - qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ... - qualityScoreCollapsedKey[1] = key[1]; // and quality score - collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey); - if (collapsedDatum == null) { - dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey); - } - else { - collapsedDatum.increment(fullDatum); - } - - // Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed - for (int iii = 0; iii < dataCollapsedByCovariate.size(); iii++) { - covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ... - covariateCollapsedKey[1] = key[1]; // and quality score ... - final Object theCovariateElement = key[iii + 2]; // and the given covariate - if (theCovariateElement != null) { - covariateCollapsedKey[2] = theCovariateElement; - collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey); - if (collapsedDatum == null) { - dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey); - } - else { - collapsedDatum.increment(fullDatum); - } - } - } - } - - /** - * Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score - * that will be used in the sequential calculation in TableRecalibrationWalker - * - * @param smoothing The smoothing parameter that goes into empirical quality score calculation - * @param maxQual At which value to cap the quality scores - */ - public final void generateEmpiricalQualities(final int smoothing, final int maxQual) { - - recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing, maxQual); - recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual); - for (NestedHashMap map : dataCollapsedByCovariate) { - recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual); - checkForSingletons(map.data); - } - } - - private void recursivelyGenerateEmpiricalQualities(final Map data, final int smoothing, final int maxQual) { - - for (Object comp : data.keySet()) { - final Object val = data.get(comp); - if (val instanceof RecalDatum) { // We are at the end of the nested hash maps - ((RecalDatum) val).calcCombinedEmpiricalQuality(smoothing, maxQual); - } - else { // Another layer in the nested hash map - recursivelyGenerateEmpiricalQualities((Map) val, smoothing, maxQual); - } - } - } - - private void checkForSingletons(final Map data) { - // todo -- this looks like it's better just as a data.valueSet() call? - for (Object comp : data.keySet()) { - final Object val = data.get(comp); - if (val instanceof RecalDatum) { // We are at the end of the nested hash maps - if (data.keySet().size() == 1) { - data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ... - // in a previous step of the sequential calculation model - } - } - else { // Another layer in the nested hash map - checkForSingletons((Map) val); - } - } - } - - /** - * Get the appropriate collapsed table out of the set of all the tables held by this Object - * - * @param covariate Which covariate indexes the desired collapsed HashMap - * @return The desired collapsed HashMap - */ - public final NestedHashMap getCollapsedTable(final int covariate) { - if (covariate == 0) { - return dataCollapsedReadGroup; // Table where everything except read group has been collapsed - } - else if (covariate == 1) { - return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed - } - else { - return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed - } - } - - /** - * Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string - * - * @param read The read to adjust - * @param RAC The list of shared command line arguments - */ - public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) { - GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup(); - - if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) { - readGroup.setPlatform(RAC.FORCE_PLATFORM); - } - - if (readGroup.getPlatform() == null) { - if (RAC.DEFAULT_PLATFORM != null) { - if (!warnUserNullPlatform) { - Utils.warnUser("The input .bam file contains reads with no platform information. " + - "Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " + - "First observed at read with name = " + read.getReadName()); - warnUserNullPlatform = true; - } - readGroup.setPlatform(RAC.DEFAULT_PLATFORM); - } - else { - throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName()); - } - } - } - - /** - * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space - * - * @param read The SAMRecord to parse - */ - public static void parseColorSpace(final GATKSAMRecord read) { - - // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base - if (ReadUtils.isSOLiDRead(read)) { - if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); - if (attr != null) { - byte[] colorSpace; - if (attr instanceof String) { - colorSpace = ((String) attr).getBytes(); - } - else { - throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); - } - - // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read - byte[] readBases = read.getReadBases(); - if (read.getReadNegativeStrandFlag()) { - readBases = BaseUtils.simpleReverseComplement(read.getReadBases()); - } - final byte[] inconsistency = new byte[readBases.length]; - int iii; - byte prevBase = colorSpace[0]; // The sentinel - for (iii = 0; iii < readBases.length; iii++) { - final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]); - inconsistency[iii] = (byte) (thisBase == readBases[iii] ? 0 : 1); - prevBase = readBases[iii]; - } - read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); - - } - else { - throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + - " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); - } - } - } - } - - /** - * Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases - * This method doesn't add the inconsistent tag to the read like parseColorSpace does - * - * @param read The SAMRecord to parse - * @param originalQualScores The array of original quality scores to modify during the correction - * @param solidRecalMode Which mode of solid recalibration to apply - * @param refBases The reference for this read - * @return A new array of quality scores that have been ref bias corrected - */ - public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) { - - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); - if (attr != null) { - byte[] colorSpace; - if (attr instanceof String) { - colorSpace = ((String) attr).getBytes(); - } - else { - throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); - } - - // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read - byte[] readBases = read.getReadBases(); - final byte[] colorImpliedBases = readBases.clone(); - byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray(read.getCigar(), read.getReadBases(), refBases); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases - if (read.getReadNegativeStrandFlag()) { - readBases = BaseUtils.simpleReverseComplement(read.getReadBases()); - refBasesDirRead = BaseUtils.simpleReverseComplement(refBasesDirRead.clone()); - } - final int[] inconsistency = new int[readBases.length]; - byte prevBase = colorSpace[0]; // The sentinel - for (int iii = 0; iii < readBases.length; iii++) { - final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]); - colorImpliedBases[iii] = thisBase; - inconsistency[iii] = (thisBase == readBases[iii] ? 0 : 1); - prevBase = readBases[iii]; - } - - // Now that we have the inconsistency array apply the desired correction to the inconsistent bases - if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0 - final boolean setBaseN = false; - originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN); - } - else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) { - final boolean setBaseN = true; - originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN); - } - else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases - solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead); - } - - } - else { - throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + - " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); - } - - return originalQualScores; - } - - public static boolean checkNoCallColorSpace(final GATKSAMRecord read) { - if (ReadUtils.isSOLiDRead(read)) { - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); - if (attr != null) { - byte[] colorSpace; - if (attr instanceof String) { - colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel - } - else { - throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); - } - - for (byte color : colorSpace) { - if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') { - return true; // There is a bad color in this SOLiD read and the user wants to skip over it - } - } - - } - else { - throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + - " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); - } - } - - return false; // There aren't any color no calls in this SOLiD read - } - - /** - * Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero - * - * @param read The SAMRecord to recalibrate - * @param readBases The bases in the read which have been RC'd if necessary - * @param inconsistency The array of 1/0 that says if this base is inconsistent with its color - * @param originalQualScores The array of original quality scores to set to zero if needed - * @param refBases The reference which has been RC'd if necessary - * @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar - * @return The byte array of original quality scores some of which might have been set to zero - */ - private static byte[] solidRecalSetToQZero(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores, final byte[] refBases, final boolean setBaseN) { - - final boolean negStrand = read.getReadNegativeStrandFlag(); - for (int iii = 1; iii < originalQualScores.length; iii++) { - if (inconsistency[iii] == 1) { - if (readBases[iii] == refBases[iii]) { - if (negStrand) { - originalQualScores[originalQualScores.length - (iii + 1)] = (byte) 0; - } - else { - originalQualScores[iii] = (byte) 0; - } - if (setBaseN) { - readBases[iii] = (byte) 'N'; - } - } - // Set the prev base to Q0 as well - if (readBases[iii - 1] == refBases[iii - 1]) { - if (negStrand) { - originalQualScores[originalQualScores.length - iii] = (byte) 0; - } - else { - originalQualScores[iii - 1] = (byte) 0; - } - if (setBaseN) { - readBases[iii - 1] = (byte) 'N'; - } - } - } - } - if (negStrand) { - readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read - } - read.setReadBases(readBases); - - return originalQualScores; - } - - /** - * Peform the REMOVE_REF_BIAS solid recalibration. Look at the color space qualities and probabilistically decide if the base should be change to match the color or left as reference - * - * @param read The SAMRecord to recalibrate - * @param readBases The bases in the read which have been RC'd if necessary - * @param inconsistency The array of 1/0 that says if this base is inconsistent with its color - * @param colorImpliedBases The bases implied by the color space, RC'd if necessary - * @param refBases The reference which has been RC'd if necessary - */ - private static void solidRecalRemoveRefBias(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases, final byte[] refBases) { - - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG); - if (attr != null) { - byte[] colorSpaceQuals; - if (attr instanceof String) { - String x = (String) attr; - colorSpaceQuals = x.getBytes(); - SAMUtils.fastqToPhred(colorSpaceQuals); - } - else { - throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName())); - } - - for (int iii = 1; iii < inconsistency.length - 1; iii++) { - if (inconsistency[iii] == 1) { - for (int jjj = iii - 1; jjj <= iii; jjj++) { // Correct this base and the one before it along the direction of the read - if (jjj == iii || inconsistency[jjj] == 0) { // Don't want to correct the previous base a second time if it was already corrected in the previous step - if (readBases[jjj] == refBases[jjj]) { - if (colorSpaceQuals[jjj] == colorSpaceQuals[jjj + 1]) { // Equal evidence for the color implied base and the reference base, so flip a coin - final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt(2); - if (rand == 0) { // The color implied base won the coin flip - readBases[jjj] = colorImpliedBases[jjj]; - } - } - else { - final int maxQuality = Math.max((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]); - final int minQuality = Math.min((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]); - int diffInQuality = maxQuality - minQuality; - int numLow = minQuality; - if (numLow == 0) { - numLow++; - diffInQuality++; - } - final int numHigh = Math.round(numLow * (float) Math.pow(10.0f, (float) diffInQuality / 10.0f)); // The color with higher quality is exponentially more likely - final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt(numLow + numHigh); - if (rand >= numLow) { // higher q score won - if (maxQuality == (int) colorSpaceQuals[jjj]) { - readBases[jjj] = colorImpliedBases[jjj]; - } // else ref color had higher q score, and won out, so nothing to do here - } - else { // lower q score won - if (minQuality == (int) colorSpaceQuals[jjj]) { - readBases[jjj] = colorImpliedBases[jjj]; - } // else ref color had lower q score, and won out, so nothing to do here - } - } - } - } - } - } - } - - if (read.getReadNegativeStrandFlag()) { - readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read - } - read.setReadBases(readBases); - } - else { // No color space quality tag in file - throw new UserException.MalformedBAM(read, "REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName()); - } - } - - /** - * Given the base and the color calculate the next base in the sequence - * - * @param prevBase The base - * @param color The color - * @return The next base in the sequence - */ - private static byte getNextBaseFromColor(GATKSAMRecord read, final byte prevBase, final byte color) { - switch (color) { - case '0': - return prevBase; - case '1': - return performColorOne(prevBase); - case '2': - return performColorTwo(prevBase); - case '3': - return performColorThree(prevBase); - default: - throw new UserException.MalformedBAM(read, "Unrecognized color space in SOLID read, color = " + (char) color + - " Unfortunately this bam file can not be recalibrated without full color space information because of potential reference bias."); - } - } - - /** - * Check if this base is inconsistent with its color space. If it is then SOLID inserted the reference here and we should reduce the quality - * - * @param read The read which contains the color space to check against - * @param offset The offset in the read at which to check - * @return Returns true if the base was inconsistent with the color space - */ - public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) { - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); - if (attr != null) { - final byte[] inconsistency = (byte[]) attr; - // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference! - if (read.getReadNegativeStrandFlag()) { // Negative direction - return inconsistency[inconsistency.length - offset - 1] != (byte) 0; - } - else { // Forward direction - return inconsistency[offset] != (byte) 0; - } - - // This block of code is for if you want to check both the offset and the next base for color space inconsistency - //if( read.getReadNegativeStrandFlag() ) { // Negative direction - // if( offset == 0 ) { - // return inconsistency[0] != 0; - // } else { - // return (inconsistency[inconsistency.length - offset - 1] != 0) || (inconsistency[inconsistency.length - offset] != 0); - // } - //} else { // Forward direction - // if( offset == inconsistency.length - 1 ) { - // return inconsistency[inconsistency.length - 1] != 0; - // } else { - // return (inconsistency[offset] != 0) || (inconsistency[offset + 1] != 0); - // } - //} - - } - else { // No inconsistency array, so nothing is inconsistent - return false; - } - } - - /** - * Computes all requested covariates for every offset in the given read - * by calling covariate.getValues(..). - * - * @param gatkRead The read for which to compute covariate values. - * @param requestedCovariates The list of requested covariates. - * @return An array of covariate values where result[i][j] is the covariate - * value for the ith position in the read and the jth covariate in - * reqeustedCovariates list. - */ - public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List requestedCovariates) { - //compute all covariates for this read - final int numRequestedCovariates = requestedCovariates.size(); - final int readLength = gatkRead.getReadLength(); - - final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates]; - final Comparable[] tempCovariateValuesHolder = new Comparable[readLength]; - - for (int i = 0; i < numRequestedCovariates; i++) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read - requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder); - for (int j = 0; j < readLength; j++) - covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j]; // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types. - } - - return covariateValues_offset_x_covar; - } - - /** - * Perform a certain transversion (A <-> C or G <-> T) on the base. - * - * @param base the base [AaCcGgTt] - * @return the transversion of the base, or the input base if it's not one of the understood ones - */ - private static byte performColorOne(byte base) { - switch (base) { - case 'A': - case 'a': - return 'C'; - case 'C': - case 'c': - return 'A'; - case 'G': - case 'g': - return 'T'; - case 'T': - case 't': - return 'G'; - default: - return base; - } - } - - /** - * Perform a transition (A <-> G or C <-> T) on the base. - * - * @param base the base [AaCcGgTt] - * @return the transition of the base, or the input base if it's not one of the understood ones - */ - private static byte performColorTwo(byte base) { - switch (base) { - case 'A': - case 'a': - return 'G'; - case 'C': - case 'c': - return 'T'; - case 'G': - case 'g': - return 'A'; - case 'T': - case 't': - return 'C'; - default: - return base; - } - } - - /** - * Return the complement (A <-> T or C <-> G) of a base. - * - * @param base the base [AaCcGgTt] - * @return the complementary base, or the input base if it's not one of the understood ones - */ - private static byte performColorThree(byte base) { - switch (base) { - case 'A': - case 'a': - return 'T'; - case 'C': - case 'c': - return 'G'; - case 'G': - case 'g': - return 'C'; - case 'T': - case 't': - return 'A'; - default: - return base; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatum.java deleted file mode 100755 index aa9098549..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatum.java +++ /dev/null @@ -1,118 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 3, 2009 - * - * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. - */ - -public class RecalDatum extends RecalDatumOptimized { - - private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations - private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) - - //--------------------------------------------------------------------------------------------------------------- - // - // constructors - // - //--------------------------------------------------------------------------------------------------------------- - - public RecalDatum() { - numObservations = 0L; - numMismatches = 0L; - estimatedQReported = 0.0; - empiricalQuality = 0.0; - } - - public RecalDatum( final long _numObservations, final long _numMismatches, final double _estimatedQReported, final double _empiricalQuality ) { - numObservations = _numObservations; - numMismatches = _numMismatches; - estimatedQReported = _estimatedQReported; - empiricalQuality = _empiricalQuality; - } - - public RecalDatum( final RecalDatum copy ) { - this.numObservations = copy.numObservations; - this.numMismatches = copy.numMismatches; - this.estimatedQReported = copy.estimatedQReported; - this.empiricalQuality = copy.empiricalQuality; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // increment methods - // - //--------------------------------------------------------------------------------------------------------------- - - public final void combine( final RecalDatum other ) { - final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); - this.increment( other.numObservations, other.numMismatches ); - this.estimatedQReported = -10 * Math.log10(sumErrors / (double)this.numObservations); - //if( this.estimatedQReported > QualityUtils.MAX_REASONABLE_Q_SCORE ) { this.estimatedQReported = QualityUtils.MAX_REASONABLE_Q_SCORE; } - } - - //--------------------------------------------------------------------------------------------------------------- - // - // methods to derive empirical quality score - // - //--------------------------------------------------------------------------------------------------------------- - - public final void calcCombinedEmpiricalQuality( final int smoothing, final int maxQual ) { - this.empiricalQuality = empiricalQualDouble(smoothing, maxQual); // cache the value so we don't call log over and over again - } - - //--------------------------------------------------------------------------------------------------------------- - // - // misc. methods - // - //--------------------------------------------------------------------------------------------------------------- - - public final double getEstimatedQReported() { - return estimatedQReported; - } - - public final double getEmpiricalQuality() { - return empiricalQuality; - } - - private double calcExpectedErrors() { - return (double)this.numObservations * qualToErrorProb( estimatedQReported ); - } - - private double qualToErrorProb( final double qual ) { - return Math.pow(10.0, qual / -10.0); - } - - - @Override - public String toString() { - return String.format("%d,%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()), (byte) Math.floor(getEstimatedQReported())); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java deleted file mode 100755 index f04989fa5..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java +++ /dev/null @@ -1,135 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.QualityUtils; - -import java.util.List; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Jan 6, 2010 - * - * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed. - * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. - */ - -public class RecalDatumOptimized { - - protected long numObservations; // number of bases seen in total - protected long numMismatches; // number of bases seen that didn't match the reference - - //--------------------------------------------------------------------------------------------------------------- - // - // constructors - // - //--------------------------------------------------------------------------------------------------------------- - - public RecalDatumOptimized() { - numObservations = 0L; - numMismatches = 0L; - } - - public RecalDatumOptimized( final long _numObservations, final long _numMismatches) { - numObservations = _numObservations; - numMismatches = _numMismatches; - } - - public RecalDatumOptimized( final RecalDatumOptimized copy ) { - this.numObservations = copy.numObservations; - this.numMismatches = copy.numMismatches; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // increment methods - // - //--------------------------------------------------------------------------------------------------------------- - - public synchronized final void increment( final long incObservations, final long incMismatches ) { - numObservations += incObservations; - numMismatches += incMismatches; - } - - public synchronized final void increment( final RecalDatumOptimized other ) { - increment( other.numObservations, other.numMismatches ); - } - - public synchronized final void increment( final List data ) { - for ( RecalDatumOptimized other : data ) { - this.increment( other ); - } - } - - public synchronized final void incrementBaseCounts( final byte curBase, final byte refBase ) { - increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(refBase) ? 0 : 1 ); // increment takes num observations, then num mismatches - } - - //--------------------------------------------------------------------------------------------------------------- - // - // methods to derive empirical quality score - // - //--------------------------------------------------------------------------------------------------------------- - - public final double empiricalQualDouble( final int smoothing, final double maxQual ) { - final double doubleMismatches = (double) ( numMismatches + smoothing ); - final double doubleObservations = (double) ( numObservations + smoothing ); - double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - if (empiricalQual > maxQual) { empiricalQual = maxQual; } - return empiricalQual; - } - public final double empiricalQualDouble() { return empiricalQualDouble( 0, QualityUtils.MAX_REASONABLE_Q_SCORE ); } // 'default' behavior is to use smoothing value of zero - - public final byte empiricalQualByte( final int smoothing ) { - final double doubleMismatches = (double) ( numMismatches + smoothing ); - final double doubleObservations = (double) ( numObservations + smoothing ); - return QualityUtils.probToQual( 1.0 - doubleMismatches / doubleObservations ); // This is capped at Q40 - } - public final byte empiricalQualByte() { return empiricalQualByte( 0 ); } // 'default' behavior is to use smoothing value of zero - - //--------------------------------------------------------------------------------------------------------------- - // - // misc. methods - // - //--------------------------------------------------------------------------------------------------------------- - - public final long getNumObservations() { - return numObservations; - } - - public final long getNumMismatches() { - return numMismatches; - } - - public final String outputToCSV( ) { - return String.format( "%d,%d,%d", numObservations, numMismatches, (int)empiricalQualByte() ); - } - public final String outputToCSV( final int smoothing ) { - return String.format( "%d,%d,%d", numObservations, numMismatches, (int)empiricalQualByte(smoothing) ); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java deleted file mode 100755 index 9752b1dee..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 27, 2009 - * - * A collection of the arguments that are common to both CovariateCounterWalker and TableRecalibrationWalker. - * This set of arguments will also be passed to the constructor of every Covariate when it is instantiated. - */ - -public class RecalibrationArgumentCollection { - - ////////////////////////////////// - // Shared Command Line Arguments - ////////////////////////////////// - @Hidden - @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.") - public String DEFAULT_PLATFORM = null; - @Hidden - @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") - public String FORCE_PLATFORM = null; - @Hidden - @Argument(fullName = "window_size_nqs", shortName = "nqs", doc = "The window size used by MinimumNQSCovariate for its calculation", required = false) - public int WINDOW_SIZE = 5; - - /** - * CountCovariates and TableRecalibration accept a --solid_recal_mode flag which governs how the recalibrator handles the - * reads which have had the reference inserted because of color space inconsistencies. - */ - @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") - public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO; - - /** - * CountCovariates and TableRecalibration accept a --solid_nocall_strategy flag which governs how the recalibrator handles - * no calls in the color space tag. Unfortunately because of the reference inserted bases mentioned above, reads with no calls in - * their color space tag can not be recalibrated. - */ - @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false) - public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; - - /** - * The context covariate will use a context of this size to calculate it's covariate value - */ - @Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false) - public int CONTEXT_SIZE = 8; - - /** - * This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score. - */ - @Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false) - public int HOMOPOLYMER_NBACK = 7; - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java deleted file mode 100644 index 8eaa0198a..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import net.sf.samtools.*; -import net.sf.samtools.util.SequenceUtil; -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.List; -import java.util.MissingResourceException; -import java.util.ResourceBundle; -import java.util.regex.Pattern; - -/** - * Second pass of the base quality score recalibration -- Uses the table generated by CountCovariates to update the base quality scores of the input bam file using a sequential table calculation making the base quality scores more accurately reflect the actual quality of the bases as measured by reference mismatch rate. - * - *

- * This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal. For each - * base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, - * cycle, and dinuc). Using these values as a key in a large hashmap the walker calculates an empirical base quality score - * and overwrites the quality score currently in the read. This walker then outputs a new bam file with these updated (recalibrated) reads. - * - *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration - * - *

Input

- *

- * The input read data whose base quality scores need to be recalibrated. - *

- * The recalibration table file in CSV format that was generated by the CountCovariates walker. - *

- * - *

Output

- *

- * A bam file in which the quality scores in each read have been recalibrated. - *

- * - *

Examples

- *
- * java -Xmx4g -jar GenomeAnalysisTK.jar \
- *   -R resources/Homo_sapiens_assembly18.fasta \
- *   -I my_reads.bam \
- *   -T TableRecalibration \
- *   -o my_reads.recal.bam \
- *   -recalFile my_reads.recal_data.csv
- * 
- */ - -@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) -@WalkerName("TableRecalibration") -@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) -// This walker requires -I input.bam, it also requires -R reference.fasta -public class TableRecalibrationWalker extends ReadWalker { - - public static final String PROGRAM_RECORD_NAME = "GATK TableRecalibration"; - - ///////////////////////////// - // Shared Arguments - ///////////////////////////// - @ArgumentCollection - private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - - ///////////////////////////// - // Command Line Arguments - ///////////////////////////// - /** - * After the header, data records occur one per line until the end of the file. The first several items on a line are the - * values of the individual covariates and will change depending on which covariates were specified at runtime. The last - * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, - * and the raw empirical quality score calculated by phred-scaling the mismatch rate. - */ - @Input(fullName = "recal_file", shortName = "recalFile", required = true, doc = "Filename for the input covariates table recalibration .csv file") - public File RECAL_FILE = null; - /** - * A new bam file in which the quality scores in each read have been recalibrated. The alignment of the reads is left untouched. - */ - @Output(doc = "The output recalibrated BAM file", required = true) - private StingSAMFileWriter OUTPUT_BAM = null; - - /** - * TableRacalibration accepts a --preserve_qscores_less_than / -pQ flag that instructs TableRecalibration to not modify - * quality scores less than but rather just write them out unmodified in the recalibrated BAM file. This is useful - * because Solexa writes Q2 and Q3 bases when the machine has really gone wrong. This would be fine in and of itself, - * but when you select a subset of these reads based on their ability to align to the reference and their dinucleotide effect, - * your Q2 and Q3 bins can be elevated to Q8 or Q10, leading to issues downstream. With the default value of 5, all Q0-Q4 bases - * are unmodified during recalibration, so they don't get inappropriately evaluated. - */ - @Argument(fullName = "preserve_qscores_less_than", shortName = "pQ", doc = "Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required = false) - private int PRESERVE_QSCORES_LESS_THAN = 5; - - /** - * By default TableRecalibration applies a Yates' correction to account for overfitting when it calculates the empirical - * quality score, in particular, ( # mismatches + 1 ) / ( # observations + 1 ). TableRecalibration accepts a --smoothing / -sm - * argument which sets how many unobserved counts to add to every bin. Use --smoothing 0 to turn off all smoothing or, for example, - * --smoothing 15 for a large amount of smoothing. - */ - @Argument(fullName = "smoothing", shortName = "sm", required = false, doc = "Number of imaginary counts to add to each bin in order to smooth out bins with few data points") - private int SMOOTHING = 1; - - /** - * Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation - * by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later. - */ - @Argument(fullName = "max_quality_score", shortName = "maxQ", required = false, doc = "The integer value at which to cap the quality scores") - private int MAX_QUALITY_SCORE = 50; - - /** - * By default TableRecalibration emits the OQ field -- so you can go back and look at the original quality scores, rerun - * the system using the OQ flags, etc, on the output BAM files; to turn off emission of the OQ field use this flag. - */ - @Argument(fullName = "doNotWriteOriginalQuals", shortName = "noOQs", required = false, doc = "If true, we will not write the original quality (OQ) tag for each read") - private boolean DO_NOT_WRITE_OQ = false; - - ///////////////////////////// - // Debugging-only Arguments - ///////////////////////////// - @Hidden - @Argument(fullName = "no_pg_tag", shortName = "noPG", required = false, doc = "Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.") - private boolean NO_PG_TAG = false; - @Hidden - @Argument(fullName = "fail_with_no_eof_marker", shortName = "requireEOF", required = false, doc = "If no EOF marker is present in the covariates file, exit the program with an exception.") - private boolean REQUIRE_EOF = false; - @Hidden - @Argument(fullName = "skipUQUpdate", shortName = "skipUQUpdate", required = false, doc = "If true, we will skip the UQ updating step for each read, speeding up the calculations") - private boolean skipUQUpdate = false; - - ///////////////////////////// - // Private Member Variables - ///////////////////////////// - private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps - private final ArrayList requestedCovariates = new ArrayList(); // List of covariates to be used in this calculation - public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); - public static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*"); - public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); - public static final String EOF_MARKER = "EOF"; - private long numReadsWithMalformedColorSpace = 0; - - ///////////////////////////// - // Optimization - ///////////////////////////// - private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. - - //--------------------------------------------------------------------------------------------------------------- - // - // initialize - // - //--------------------------------------------------------------------------------------------------------------- - - /** - * Read in the recalibration table input file. - * Parse the list of covariate classes used during CovariateCounterWalker. - * Parse the CSV data and populate the hashmap. - */ - public void initialize() { - - if (RAC.FORCE_PLATFORM != null) { - RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; - } - - // Get a list of all available covariates - final List> classes = new PluginManager(Covariate.class).getPlugins(); - - int lineNumber = 0; - boolean foundAllCovariates = false; - - // Read in the data from the csv file and populate the data map and covariates list - logger.info("Reading in the data from input csv file..."); - - boolean sawEOF = false; - try { - for (String line : new XReadLines(RECAL_FILE)) { - lineNumber++; - if (EOF_MARKER.equals(line)) { - sawEOF = true; - } - else if (COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) { - ; // Skip over the comment lines, (which start with '#') - } - // Read in the covariates that were used from the input file - else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data - if (foundAllCovariates) { - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE); - } - else { // Found the covariate list in input file, loop through all of them and instantiate them - String[] vals = line.split(","); - for (int iii = 0; iii < vals.length - 3; iii++) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical - boolean foundClass = false; - for (Class covClass : classes) { - if ((vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) { - foundClass = true; - try { - Covariate covariate = (Covariate) covClass.newInstance(); - requestedCovariates.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - - } - } - - if (!foundClass) { - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option."); - } - } - } - - } - else { // Found a line of data - if (!foundAllCovariates) { - foundAllCovariates = true; - - // At this point all the covariates should have been found and initialized - if (requestedCovariates.size() < 2) { - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE); - } - - final boolean createCollapsedTables = true; - - // Initialize any covariate member variables using the shared argument collection - for (Covariate cov : requestedCovariates) { - cov.initialize(RAC); - } - // Initialize the data hashMaps - dataManager = new RecalDataManager(createCollapsedTables, requestedCovariates.size()); - - } - addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap - } - } - - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e); - } catch (NumberFormatException e) { - throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker."); - } - logger.info("...done!"); - - if (!sawEOF) { - final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool."; - if (REQUIRE_EOF) - throw new UserException.MalformedFile(RECAL_FILE, errorMessage); - logger.warn(errorMessage); - } - - logger.info("The covariates being used here: "); - for (Covariate cov : requestedCovariates) { - logger.info("\t" + cov.getClass().getSimpleName()); - } - - if (dataManager == null) { - throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?"); - } - - // Create the tables of empirical quality scores that will be used in the sequential calculation - logger.info("Generating tables of empirical qualities for use in sequential calculation..."); - dataManager.generateEmpiricalQualities(SMOOTHING, MAX_QUALITY_SCORE); - logger.info("...done!"); - - // Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used - final SAMFileHeader header = getToolkit().getSAMFileHeader().clone(); - if (!NO_PG_TAG) { - final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME); - final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText"); - try { - final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version"); - programRecord.setProgramVersion(version); - } catch (MissingResourceException e) { - } - - StringBuffer sb = new StringBuffer(); - sb.append(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this)); - sb.append(" Covariates=["); - for (Covariate cov : requestedCovariates) { - sb.append(cov.getClass().getSimpleName()); - sb.append(", "); - } - sb.setCharAt(sb.length() - 2, ']'); - sb.setCharAt(sb.length() - 1, ' '); - programRecord.setCommandLine(sb.toString()); - - List oldRecords = header.getProgramRecords(); - List newRecords = new ArrayList(oldRecords.size() + 1); - for (SAMProgramRecord record : oldRecords) { - if (!record.getId().startsWith(PROGRAM_RECORD_NAME)) - newRecords.add(record); - } - newRecords.add(programRecord); - header.setProgramRecords(newRecords); - - // Write out the new header - OUTPUT_BAM.writeHeader(header); - } - } - - /** - * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches) - * - * @param line A line of CSV data read from the recalibration table data file - */ - private void addCSVData(final File file, final String line) { - final String[] vals = line.split(","); - - // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly - if (vals.length != requestedCovariates.size() + 3) { // +3 because of nObservations, nMismatch, and Qempirical - throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line + - " --Perhaps the read group string contains a comma and isn't being parsed correctly."); - } - - final Object[] key = new Object[requestedCovariates.size()]; - Covariate cov; - int iii; - for (iii = 0; iii < requestedCovariates.size(); iii++) { - cov = requestedCovariates.get(iii); - key[iii] = cov.getValue(vals[iii]); - } - - // Create a new datum using the number of observations, number of mismatches, and reported quality score - final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0); - // Add that datum to all the collapsed tables which will be used in the sequential calculation - dataManager.addToAllTables(key, datum, PRESERVE_QSCORES_LESS_THAN); - } - - //--------------------------------------------------------------------------------------------------------------- - // - // map - // - //--------------------------------------------------------------------------------------------------------------- - - /** - * For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read - * - * @param refBases References bases over the length of the read - * @param read The read to be recalibrated - * @return The read with quality scores replaced - */ - public SAMRecord map(ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { - - if (read.getReadLength() == 0) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads. - return read; - } - - RecalDataManager.parseSAMRecord(read, RAC); - - byte[] originalQuals = read.getBaseQualities(); - final byte[] recalQuals = originalQuals.clone(); - - final String platform = read.getReadGroup().getPlatform(); - if (platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING)) { - if (!(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION)) { - final boolean badColor = RecalDataManager.checkNoCallColorSpace(read); - if (badColor) { - numReadsWithMalformedColorSpace++; - if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) { - return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them - } - else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) { - read.setReadFailsVendorQualityCheckFlag(true); - return read; - } - } - } - originalQuals = RecalDataManager.calcColorSpace(read, originalQuals, RAC.SOLID_RECAL_MODE, refBases == null ? null : refBases.getBases()); - } - - //compute all covariate values for this read - final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates); - - // For each base in the read - for (int offset = 0; offset < read.getReadLength(); offset++) { - - final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset]; - - Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey); - if (qualityScore == null) { - qualityScore = performSequentialQualityCalculation(fullCovariateKey); - qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey); - } - - recalQuals[offset] = qualityScore; - } - - preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low - - read.setBaseQualities(recalQuals); // Overwrite old qualities with new recalibrated qualities - if (!DO_NOT_WRITE_OQ && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null) { // Save the old qualities if the tag isn't already taken in the read - try { - read.setAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, SAMUtils.phredToFastq(originalQuals)); - } catch (IllegalArgumentException e) { - throw new UserException.MalformedBAM(read, "illegal base quality encountered; " + e.getMessage()); - } - } - - if (!skipUQUpdate && refBases != null && read.getAttribute(SAMTag.UQ.name()) != null) { - read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, refBases.getBases(), read.getAlignmentStart() - 1, false)); - } - - if (RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N && refBases != null && read.getAttribute(SAMTag.NM.name()) != null) { - read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, refBases.getBases(), read.getAlignmentStart() - 1, false)); - } - - return read; - } - - /** - * Implements a serial recalibration of the reads using the combinational table. - * First, we perform a positional recalibration, and then a subsequent dinuc correction. - * - * Given the full recalibration table, we perform the following preprocessing steps: - * - * - calculate the global quality score shift across all data [DeltaQ] - * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift - * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual - * - The final shift equation is: - * - * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) - * - * @param key The list of Comparables that were calculated from the covariates - * @return A recalibrated quality score as a byte - */ - private byte performSequentialQualityCalculation(final Object... key) { - - final byte qualFromRead = (byte) Integer.parseInt(key[1].toString()); - final Object[] readGroupCollapsedKey = new Object[1]; - final Object[] qualityScoreCollapsedKey = new Object[2]; - final Object[] covariateCollapsedKey = new Object[3]; - - // The global quality shift (over the read group only) - readGroupCollapsedKey[0] = key[0]; - final RecalDatum globalRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(0).get(readGroupCollapsedKey)); - double globalDeltaQ = 0.0; - if (globalRecalDatum != null) { - final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality(); - final double aggregrateQReported = globalRecalDatum.getEstimatedQReported(); - globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported; - } - - // The shift in quality between reported and empirical - qualityScoreCollapsedKey[0] = key[0]; - qualityScoreCollapsedKey[1] = key[1]; - final RecalDatum qReportedRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(1).get(qualityScoreCollapsedKey)); - double deltaQReported = 0.0; - if (qReportedRecalDatum != null) { - final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality(); - deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; - } - - // The shift in quality due to each covariate by itself in turn - double deltaQCovariates = 0.0; - double deltaQCovariateEmpirical; - covariateCollapsedKey[0] = key[0]; - covariateCollapsedKey[1] = key[1]; - for (int iii = 2; iii < key.length; iii++) { - covariateCollapsedKey[2] = key[iii]; // The given covariate - final RecalDatum covariateRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(iii).get(covariateCollapsedKey)); - if (covariateRecalDatum != null) { - deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality(); - deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); - } - } - - final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; - return QualityUtils.boundQual((int) Math.round(newQuality), (byte) MAX_QUALITY_SCORE); - - // Verbose printouts used to validate with old recalibrator - //if(key.contains(null)) { - // System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d", - // qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte)); - //} - //else { - // System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d", - // key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) ); - //} - - //return newQualityByte; - } - - /** - * Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold - * - * @param originalQuals The list of original base quality scores - * @param recalQuals A list of the new recalibrated quality scores - */ - private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) { - for (int iii = 0; iii < recalQuals.length; iii++) { - if (originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN) { - recalQuals[iii] = originalQuals[iii]; - } - } - } - - //--------------------------------------------------------------------------------------------------------------- - // - // reduce - // - //--------------------------------------------------------------------------------------------------------------- - - /** - * Start the reduce with a handle to the output bam file - * - * @return A FileWriter pointing to a new bam file - */ - public SAMFileWriter reduceInit() { - return OUTPUT_BAM; - } - - /** - * Output each read to disk - * - * @param read The read to output - * @param output The FileWriter to write the read to - * @return The FileWriter - */ - public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) { - if (output != null) { - output.addAlignment(read); - } - return output; - } - - /** - * Do nothing - * - * @param output The SAMFileWriter that outputs the bam file - */ - public void onTraversalDone(SAMFileWriter output) { - if (numReadsWithMalformedColorSpace != 0) { - if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) { - Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " + - "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " + - "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " + - "These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!"); - } - else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) { - Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " + - "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " + - "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " + - "These reads were completely removed from the output bam file."); - - } - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java index 2e3fc26f6..5beeeb3e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.validation; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -38,10 +39,11 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.util.Map; import java.util.Set; @@ -188,14 +190,12 @@ import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel; * @since ${DATE} */ +@DocumentedGATKFeature( groupName = "Validation Utilities", extraDocs = {CommandLineGATK.class} ) @Requires(value={DataSource.READS, DataSource.REFERENCE}) @Allows(value={DataSource.READS, DataSource.REFERENCE}) - @By(DataSource.REFERENCE) @Reference(window=@Window(start=-200,stop=200)) - - -public class GenotypeAndValidateWalker extends RodWalker implements TreeReducible { +public class GenotypeAndValidate extends RodWalker implements TreeReducible { /** * The optional output file that will have all the variants used in the Genotype and Validation essay. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 3d281ef6c..9d96dedef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,11 +19,13 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -88,6 +91,7 @@ import java.util.List; * @author chartl * @since July 2011 */ +@DocumentedGATKFeature( groupName = "Validation Utilities", extraDocs = {CommandLineGATK.class} ) @Requires(value={DataSource.REFERENCE}) public class ValidationAmplicons extends RodWalker { /** @@ -259,20 +263,33 @@ public class ValidationAmplicons extends RodWalker { sequenceInvalid = true; invReason.add("SITE_IS_FILTERED"); } + + String refString = validate.getReference().getDisplayString(); + String altString = validate.getAlternateAllele(0).getDisplayString(); + if ( validate.isIndel() ) { sequence.append(Character.toUpperCase((char)ref.getBase())); rawSequence.append(Character.toUpperCase((char)ref.getBase())); + final byte[] refAllele = validate.getReference().getBases(); + refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length)); + if ( refString.isEmpty() ) + refString = "-"; + final byte[] altAllele = validate.getAlternateAllele(0).getBases(); + altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length)); + if ( altString.isEmpty() ) + altString = "-"; } + sequence.append('['); - sequence.append(validate.getAlternateAllele(0).toString()); + sequence.append(altString); sequence.append('/'); - sequence.append(validate.getReference().toString()); + sequence.append(refString); sequence.append(']'); // do this to the raw sequence to -- the indeces will line up that way rawSequence.append('['); - rawSequence.append(validate.getAlternateAllele(0).getBaseString()); + rawSequence.append(altString); rawSequence.append('/'); - rawSequence.append(validate.getReference().getBaseString()); + rawSequence.append(refString); rawSequence.append(']'); allelePos = ref.getLocus(); if ( indelCounter > 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java index e54dc6388..3e48520a7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java @@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector { return true; // want to include a site in the given samples if it is *likely* to be variant (via the EXACT model) // first subset to the samples - VariantContext subContext = vc.subContextFromSamples(samples, true); + VariantContext subContext = vc.subContextFromSamples(samples); // now check to see (using EXACT model) whether this should be variant // do we want to apply a prior? maybe user-spec? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java index 0f55524a6..de832b108 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java @@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{ if ( samples == null || samples.isEmpty() ) return true; - VariantContext subContext = vc.subContextFromSamples(samples, false); + VariantContext subContext = vc.subContextFromSamples(samples); if ( subContext.isPolymorphicInSamples() ) { return true; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java index af6a52002..67ddc47ff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -40,14 +39,11 @@ public class GenomeEvent implements Comparable { final protected GenomeLoc loc; /** A set of the alleles segregating in this context */ final protected List alleles; - final protected Byte refBase; // final protected HashMap attributes; - public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List alleles, HashMap attributes, - byte base) { + public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List alleles, HashMap attributes) { this.loc = parser.createGenomeLoc(contig, start, stop); this.alleles = alleles; - this.refBase = base; // this.attributes = attributes; } @@ -68,7 +64,7 @@ public class GenomeEvent implements Comparable { public VariantContext createVariantContextFromEvent() { return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles) - .log10PError(0.0).referenceBaseForIndel(refBase).make(); + .log10PError(0.0).make(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java index 4b68eed2e..7c1d63f02 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java @@ -115,7 +115,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector { // create bare-bones event and log in corresponding bin // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes - GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel()); + GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes); binnedEventArray[binIndex].add(event); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java index eda75d647..4019c5631 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java @@ -65,7 +65,7 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector { } // create bare-bones event and log in corresponding bin // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes - GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel()); + GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes); binnedEventArray.add(event); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelectorWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java index 78fe37fa3..9e5fd25a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java @@ -24,16 +24,20 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.io.File; import java.util.*; @@ -94,7 +98,8 @@ import java.util.*; * * */ -public class ValidationSiteSelectorWalker extends RodWalker { +@DocumentedGATKFeature( groupName = "Validation Utilities", extraDocs = {CommandLineGATK.class} ) +public class ValidationSiteSelector extends RodWalker { public enum AF_COMPUTATION_MODE { KEEP_AF_SPECTRUM, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java index b9e768274..58cd14737 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java @@ -7,6 +7,7 @@ import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -26,6 +27,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -86,9 +88,10 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-50, stop=50)) @PartitionBy(PartitionType.NONE) -public class VariantEvalWalker extends RodWalker implements TreeReducible { +public class VariantEval extends RodWalker implements TreeReducible { public static final String IS_SINGLETON_KEY = "ISSINGLETON"; @Output @@ -497,7 +500,10 @@ public class VariantEvalWalker extends RodWalker implements Tr @Requires({"eval != null", "comp != null"}) private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) { - // find all of the matching comps + if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION ) + // if either of these are NO_VARIATION they are LENIENT matches + return EvalCompMatchType.LENIENT; + if ( comp.getType() != eval.getType() ) return EvalCompMatchType.NO_MATCH; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java index 2a759f2f5..97814075c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java @@ -183,13 +183,13 @@ public class VariantEvalReportWriter { throw new ReviewedStingException("Datamap is empty for analysis " + scanner.getAnalysis()); // add DataPoint's for each field marked as such - for (final Field field : datamap.keySet()) { + for (final Map.Entry field : datamap.entrySet()) { try { - field.setAccessible(true); + field.getKey().setAccessible(true); // this is an atomic value, add a column for it - final String format = datamap.get(field).format(); - table.addColumn(field.getName(), format); + final String format = field.getValue().format(); + table.addColumn(field.getKey().getName(), format); } catch (SecurityException e) { throw new StingException("SecurityException: " + e); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index ff3bf66f7..97a8b4dda 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.samples.Sample; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; @@ -104,7 +104,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator { MendelianViolation mv; Map> families; - public void initialize(VariantEvalWalker walker) { + public void initialize(VariantEval walker) { super.initialize(walker); mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false); families = walker.getSampleDB().getFamilies(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index 88bf3aef9..a509294ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -56,7 +56,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator { //increment stats for pairwise mismatches for (Allele allele : genotype.getAlleles()) { - if (allele.isNonNull() && allele.isCalled()) { + if (allele.isCalled()) { String alleleString = allele.toString(); alleleCounts.putIfAbsent(alleleString, 0); alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index 6c4fcd26d..fe2437976 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv } } - public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (vc1 != null) updateTiTv(vc1, false); - if (vc2 != null) updateTiTv(vc2, true); + @Override + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (eval != null) + updateTiTv(eval, false); + if (comp != null) + updateTiTv(comp, true); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index e36c2f294..a2bcdaf1d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -11,6 +11,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Collection; +import java.util.Set; /** * The Broad Institute @@ -102,6 +103,10 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { nDifferentAlleleSites++; else { SiteStatus evalStatus = calcSiteStatus(eval); + final Set evalSamples = getWalker().getSampleNamesForEvaluation(); + if ( comp.hasGenotypes() && ! evalSamples.isEmpty() && comp.hasGenotypes(evalSamples) ) + // if we have genotypes in both eval and comp, subset comp down just the samples in eval + comp = comp.subContextFromSamples(evalSamples, false); SiteStatus compStatus = calcSiteStatus(comp); counts[compStatus.ordinal()][evalStatus.ordinal()]++; } @@ -111,7 +116,7 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { // // helper routines // - public SiteStatus calcSiteStatus(VariantContext vc) { + private SiteStatus calcSiteStatus(VariantContext vc) { if ( vc == null ) return SiteStatus.NO_CALL; if ( vc.isFiltered() ) return SiteStatus.FILTERED; if ( vc.isMonomorphicInSamples() ) return SiteStatus.MONO; @@ -121,24 +126,18 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { int ac = 0; if ( vc.getNAlleles() > 2 ) { return SiteStatus.POLY; -//// System.out.printf("multiple alleles %s = %s%n", vc.getAlleles(), vc.getExtendedAttribute(VCFConstants.ALLELE_COUNT_KEY)); -// // todo -- omg this is painful. We need a better approach to dealing with multi-valued attributes -// for ( String v : (List)vc.getExtendedAttribute(VCFConstants.ALLELE_COUNT_KEY) ) -// ac += Integer.valueOf(v); -//// System.out.printf(" ac = %d%n", ac); } else ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0); return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO; } else { return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do - //return SiteStatus.NO_CALL; // we can't figure out what to do } } - public boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) { + private boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) { Collection evalAlts = eval.getAlternateAlleles(); Collection compAlts = comp.getAlternateAlleles(); if ( evalAlts.size() != compAlts.size() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index df4c3e860..c08ff379b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -3,23 +3,23 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; public abstract class VariantEvaluator implements Comparable { - private VariantEvalWalker walker; + private VariantEval walker; private final String simpleName; protected VariantEvaluator() { this.simpleName = getClass().getSimpleName(); } - public void initialize(VariantEvalWalker walker) { + public void initialize(VariantEval walker) { this.walker = walker; } - public VariantEvalWalker getWalker() { + public VariantEval getWalker() { return walker; } @@ -57,7 +57,7 @@ public abstract class VariantEvaluator implements Comparable { * @return true if eval was originally a singleton site */ protected static boolean variantWasSingleton(final VariantContext eval) { - return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false); + return eval.getAttributeAsBoolean(VariantEval.IS_SINGLETON_KEY, false); } public final String getSimpleName() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index 014381bea..71ea6af98 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -29,7 +29,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.GenomeLoc; @@ -150,7 +150,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { } - public void initialize(VariantEvalWalker walker) { + public void initialize(VariantEval walker) { super.initialize(walker); nSamples = walker.getSampleNamesForEvaluation().size(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 693bdf198..2ad08d806 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification final Collection knownComps = tracker.getValues(knowns, ref.getLocus()); for ( final VariantContext c : knownComps ) { // loop over sites, looking for something that matches the type eval - if ( eval.getType() == c.getType() ) { + if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) { return KNOWN_STATES; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 07ba424a2..90b6230ca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.Stratifier; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -13,7 +13,7 @@ import java.util.List; import java.util.Set; public abstract class VariantStratifier implements Comparable, Stratifier { - private VariantEvalWalker variantEvalWalker; + private VariantEval variantEvalWalker; final private String name; final protected ArrayList states = new ArrayList(); @@ -40,7 +40,7 @@ public abstract class VariantStratifier implements Comparable /** * @return a reference to the parent VariantEvalWalker running this stratification */ - public final VariantEvalWalker getVariantEvalWalker() { + public final VariantEval getVariantEvalWalker() { return variantEvalWalker; } @@ -48,7 +48,7 @@ public abstract class VariantStratifier implements Comparable * Should only be called by VariantEvalWalker itself * @param variantEvalWalker */ - public final void setVariantEvalWalker(VariantEvalWalker variantEvalWalker) { + public final void setVariantEvalWalker(VariantEval variantEvalWalker) { this.variantEvalWalker = variantEvalWalker; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java index 390682837..c98c05a9a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java @@ -3,27 +3,27 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.ArrayList; +import java.util.Set; +import java.util.TreeSet; public final class EvaluationContext { // NOTE: must be hashset to avoid O(log n) cost of iteration in the very frequently called apply function - final VariantEvalWalker walker; + final VariantEval walker; private final ArrayList evaluationInstances; private final Set> evaluationClasses; - public EvaluationContext(final VariantEvalWalker walker, final Set> evaluationClasses) { + public EvaluationContext(final VariantEval walker, final Set> evaluationClasses) { this(walker, evaluationClasses, true); } - private EvaluationContext(final VariantEvalWalker walker, final Set> evaluationClasses, final boolean doInitialize) { + private EvaluationContext(final VariantEval walker, final Set> evaluationClasses, final boolean doInitialize) { this.walker = walker; this.evaluationClasses = evaluationClasses; this.evaluationInstances = new ArrayList(evaluationClasses.size()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 3b28747fb..e84b0b10e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -28,7 +28,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification; @@ -45,10 +45,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; public class VariantEvalUtils { - private final VariantEvalWalker variantEvalWalker; + private final VariantEval variantEvalWalker; Logger logger; - public VariantEvalUtils(VariantEvalWalker variantEvalWalker) { + public VariantEvalUtils(VariantEval variantEvalWalker) { this.variantEvalWalker = variantEvalWalker; this.logger = variantEvalWalker.getLogger(); } @@ -197,7 +197,9 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested samples */ public VariantContext getSubsetOfVariantContext(VariantContext vc, Set sampleNames) { - return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false)); + // if we want to preserve AC0 sites as polymorphic we need to not rederive alleles + final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites(); + return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles)); } public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) { @@ -216,7 +218,7 @@ public class VariantEvalUtils { VariantContextBuilder builder = new VariantContextBuilder(vcsub); if ( isSingleton ) - builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true); + builder.attribute(VariantEval.IS_SINGLETON_KEY, true); if ( ! hasChrCountAnnotations ) VariantContextUtils.calculateChromosomeCounts(builder, true); @@ -262,15 +264,11 @@ public class VariantEvalUtils { // First, filter the VariantContext to represent only the samples for evaluation VariantContext vcsub = vc; - if (subsetBySample && vc.hasGenotypes()) { - if ( variantEvalWalker.isSubsettingToSpecificSamples() ) - vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); - else - vcsub = ensureAnnotations(vc, vc); - } + if (subsetBySample && vc.hasGenotypes()) + vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); if ((byFilter || !vcsub.isFiltered())) { - addMapping(mapping, VariantEvalWalker.getAllSampleName(), vcsub); + addMapping(mapping, VariantEval.getAllSampleName(), vcsub); } // Now, if stratifying, split the subsetted vc per sample and add each as a new context diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index f95e5647c..011f3471c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -38,6 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -56,14 +58,11 @@ import java.util.*; * to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a * slightly lower quality level. * - *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * *

Input

*

* The input raw variants to be recalibrated. *

- * The recalibration table file in CSV format that was generated by the VariantRecalibrator walker. + * The recalibration table file in VCF format that was generated by the VariantRecalibrator walker. *

* The tranches file that was generated by the VariantRecalibrator walker. * @@ -80,11 +79,13 @@ import java.util.*; * --ts_filter_level 99.0 \ * -tranchesFile path/to/output.tranches \ * -recalFile path/to/output.recal \ + * -mode SNP \ * -o path/to/output.recalibrated.filtered.vcf * * */ +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.LOCUS) public class ApplyRecalibration extends RodWalker implements TreeReducible { @@ -172,7 +173,7 @@ public class ApplyRecalibration extends RodWalker implements T vcfWriter.writeHeader(vcfHeader); } - public static final void addVQSRStandardHeaderLines(final Set hInfo) { + public static void addVQSRStandardHeaderLines(final Set hInfo) { hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY)); hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model")); hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out")); @@ -195,7 +196,7 @@ public class ApplyRecalibration extends RodWalker implements T for( final VariantContext vc : VCs ) { - if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { + if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { final VariantContext recalDatum = getMatchingRecalVC(vc, recals); if( recalDatum == null ) { @@ -236,7 +237,9 @@ public class ApplyRecalibration extends RodWalker implements T filterString = tranches.get(0).name+"+"; } - if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { + if( filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { + builder.passFilters(); + } else { builder.filters(filterString); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index d6df4ff1b..e88505f99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -234,7 +235,7 @@ public class VariantDataManager { double value; try { - value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); + value = vc.getAttributeAsDouble( annotationKey, Double.NaN ); if( Double.isInfinite(value) ) { value = Double.NaN; } if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); @@ -273,11 +274,37 @@ public class VariantDataManager { } private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) { - return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && - ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) && + return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples()); } + protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) { + switch( trainVC.getType() ) { + case SNP: + case MNP: + return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP ); + case INDEL: + case MIXED: + case SYMBOLIC: + return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL ); + default: + return false; + } + } + + protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) { + switch( mode ) { + case SNP: + return evalVC.isSNP() || evalVC.isMNP(); + case INDEL: + return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic(); + case BOTH: + return true; + default: + throw new ReviewedStingException( "Encountered unknown recal mode: " + mode ); + } + } + public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) { // we need to sort in coordinate order in order to produce a valid VCF Collections.sort( data, new Comparator() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java index 32350f0fa..a85129d78 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java @@ -52,6 +52,7 @@ public class VariantDatum implements Comparable { public int worstAnnotation; public MultivariateGaussian assignment; // used in K-means implementation + @Override public int compareTo( final VariantDatum other ) { return Double.compare(this.lod, other.lod); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 244c5d109..c670ad2fd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -37,16 +38,15 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.io.Resource; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.io.FileNotFoundException; @@ -63,7 +63,7 @@ import java.util.*; * The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set. * One can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call. * The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship - * between SNP call annotations (QD, SB, HaplotypeScore, HRun, for example) and the the probability that a SNP is a true genetic + * between SNP call annotations (QD, MQ, HaplotypeScore, and ReadPosRankSum, for example) and the the probability that a SNP is a true genetic * variant versus a sequencing or data processing artifact. This model is determined adaptively based on "true sites" provided * as input, typically HapMap 3 sites and those sites found to be polymorphic on the Omni 2.5M SNP chip array. This adaptive * error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the @@ -71,15 +71,9 @@ import java.util.*; * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model. * *

- * NOTE: Please see our best practices wiki page for our recommendations on which annotations to use for specific project designs. - * - *

* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version). * See http://www.r-project.org for more info on how to download and install R. * - *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * *

Input

*

* The input raw variants to be recalibrated. @@ -88,7 +82,7 @@ import java.util.*; * *

Output

*

- * A recalibration table file in CSV format that is used by the ApplyRecalibration walker. + * A recalibration table file in VCF format that is used by the ApplyRecalibration walker. *

* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data. * @@ -100,8 +94,9 @@ import java.util.*; * -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \ * -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \ * -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \ - * -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \ - * -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \ + * -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \ + * -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \ + * -mode SNP \ * -recalFile path/to/output.recal \ * -tranchesFile path/to/output.tranches \ * -rscriptFile path/to/output.plots.R @@ -109,6 +104,7 @@ import java.util.*; * */ +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.NONE) public class VariantRecalibrator extends RodWalker, ExpandingArrayList> implements TreeReducible> { @@ -184,9 +180,6 @@ public class VariantRecalibrator extends RodWalker * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-50,stop=50)) public class CombineVariants extends RodWalker { /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 43816b0fa..f89bcb2a7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -33,18 +34,18 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Arrays; -import java.util.Collection; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * Filters a lifted-over VCF file for ref bases that have been changed. */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=0,stop=100)) public class FilterLiftedVariants extends RodWalker { @@ -63,7 +64,7 @@ public class FilterLiftedVariants extends RodWalker { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : null, samples); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : Collections.emptySet(), samples); writer.writeHeader(vcfHeader); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index c1755aa00..9fe499a03 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -41,6 +42,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; @@ -76,6 +78,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-200,stop=200)) public class LeftAlignVariants extends RodWalker { @@ -136,11 +139,11 @@ public class LeftAlignVariants extends RodWalker { final byte[] refSeq = ref.getBases(); // get the indel length - int indelLength; + final int indelLength; if ( vc.isSimpleDeletion() ) - indelLength = vc.getReference().length(); + indelLength = vc.getReference().length() - 1; else - indelLength = vc.getAlternateAllele(0).length(); + indelLength = vc.getAlternateAllele(0).length() - 1; if ( indelLength > 200 ) { writer.add(vc); @@ -148,7 +151,7 @@ public class LeftAlignVariants extends RodWalker { } // create an indel haplotype - int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; + final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength); // create a CIGAR string to represent the event @@ -167,11 +170,12 @@ public class LeftAlignVariants extends RodWalker { VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make(); //System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference)); - int indelIndex = originalIndex-difference; - byte[] newBases = new byte[indelLength]; - System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength); - Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); - newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]); + final int indelIndex = originalIndex-difference; + final byte[] newBases = new byte[indelLength + 1]; + newBases[0] = refSeq[indelIndex-1]; + System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength); + final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); + newVC = updateAllele(newVC, newAllele); writer.add(newVC); return 1; @@ -192,7 +196,7 @@ public class LeftAlignVariants extends RodWalker { if ( vc.isSimpleDeletion() ) { indexOfRef += indelLength; } else { - System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength); + System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength); currentPos += indelLength; } @@ -202,14 +206,14 @@ public class LeftAlignVariants extends RodWalker { return hap; } - public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) { + public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) { // create a mapping from original allele to new allele HashMap alleleMap = new HashMap(vc.getAlleles().size()); if ( newAllele.isReference() ) { alleleMap.put(vc.getReference(), newAllele); - alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0)); + alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false)); } else { - alleleMap.put(vc.getReference(), vc.getReference()); + alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true)); alleleMap.put(vc.getAlternateAllele(0), newAllele); } @@ -226,6 +230,6 @@ public class LeftAlignVariants extends RodWalker { newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make()); } - return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make(); + return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 60d41abd5..63209e98c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -31,6 +31,7 @@ import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -39,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -51,6 +53,7 @@ import java.util.*; /** * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) public class LiftoverVariants extends RodWalker { @ArgumentCollection @@ -116,7 +119,6 @@ public class LiftoverVariants extends RodWalker { if ( toInterval != null ) { // check whether the strand flips, and if so reverse complement everything - // TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size) if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) { vc = VariantContextUtils.reverseComplement(vc); } @@ -129,11 +131,10 @@ public class LiftoverVariants extends RodWalker { .attribute("OriginalStart", fromInterval.getStart()).make(); } - VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); - if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) { + if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) { logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s", - originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(), - originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0))); + originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(), + originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0))); } writer.add(vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 484f7f052..2ab5136a7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -38,6 +39,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; @@ -48,6 +50,7 @@ import java.util.*; /** * Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results. */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) public class RandomlySplitVariants extends RodWalker { @ArgumentCollection diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java index 378da05d3..f14f6c2a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.apache.commons.io.FilenameUtils; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -98,6 +100,7 @@ import java.util.*; * */ @SuppressWarnings("unused") +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) public class SelectHeaders extends RodWalker implements TreeReducible { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index fbffd620a..cf528de09 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -41,6 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; @@ -186,6 +188,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) public class SelectVariants extends RodWalker implements TreeReducible { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @@ -326,7 +329,7 @@ public class SelectVariants extends RodWalker implements TreeR /* Private class used to store the intermediate variants in the integer random selection process */ - private class RandomVariantStructure { + private static class RandomVariantStructure { private VariantContext vc; RandomVariantStructure(VariantContext vcP) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 530258fe0..c92551a73 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.TribbleException; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -66,6 +68,7 @@ import java.util.Set; * * */ +@DocumentedGATKFeature( groupName = "Validation Utilities", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=0,stop=100)) public class ValidateVariants extends RodWalker { @@ -127,35 +130,16 @@ public class ValidateVariants extends RodWalker { return; // get the true reference allele - Allele reportedRefAllele = vc.getReference(); - Allele observedRefAllele = null; - // insertions - if ( vc.isSimpleInsertion() ) { - observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING); + final Allele reportedRefAllele = vc.getReference(); + final int refLength = reportedRefAllele.length(); + if ( refLength > 100 ) { + logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart())); + return; } - // deletions - else if ( vc.isSimpleDeletion() || vc.isMNP() ) { - // we can't validate arbitrarily long deletions - if ( reportedRefAllele.length() > 100 ) { - logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart())); - return; - } - // deletions are associated with the (position of) the last (preceding) non-deleted base; - // hence to get actually deleted bases we need offset = 1 - int offset = vc.isMNP() ? 0 : 1; - byte[] refBytes = ref.getBases(); - byte[] trueRef = new byte[reportedRefAllele.length()]; - for (int i = 0; i < reportedRefAllele.length(); i++) - trueRef[i] = refBytes[i+offset]; - observedRefAllele = Allele.create(trueRef, true); - } - // SNPs, etc. but not mixed types because they are too difficult - else if ( !vc.isMixed() ) { - byte[] refByte = new byte[1]; - refByte[0] = ref.getBase(); - observedRefAllele = Allele.create(refByte, true); - } + final byte[] observedRefBases = new byte[refLength]; + System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength); + final Allele observedRefAllele = Allele.create(observedRefBases); // get the RS IDs Set rsIDs = null; @@ -168,10 +152,10 @@ public class ValidateVariants extends RodWalker { try { switch( type ) { case ALL: - vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs); + vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs); break; case REF: - vc.validateReferenceBases(observedRefAllele, ref.getBase()); + vc.validateReferenceBases(reportedRefAllele, observedRefAllele); break; case IDS: vc.validateRSIDs(rsIDs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 3fca2d28a..a301867fc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -81,6 +83,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Validation Utilities", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=0,stop=40)) public class VariantValidationAssessor extends RodWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java index 1ffb78124..7e82fc454 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -13,6 +14,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -27,6 +29,7 @@ import java.util.*; * work efficiently on large VCFs (or at least give a progress bar). This * produces a binary ped file in individual major mode. */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) public class VariantsToBinaryPed extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 996ac75e7..b73a498bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -25,10 +25,12 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -97,6 +99,7 @@ import java.util.*; * @author Mark DePristo * @since 2010 */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) public class VariantsToTable extends RodWalker { /** * Variants from this VCF file are used by this tool as input. @@ -378,7 +381,7 @@ public class VariantsToTable extends RodWalker { getters.put("REF", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); - x.append(vc.getAlleleStringWithRefPadding(vc.getReference())); + x.append(vc.getReference().getDisplayString()); return x.toString(); } }); @@ -390,7 +393,7 @@ public class VariantsToTable extends RodWalker { for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); - x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i))); + x.append(vc.getAlternateAllele(i)); } return x.toString(); } @@ -432,11 +435,8 @@ public class VariantsToTable extends RodWalker { private static Object splitAltAlleles(VariantContext vc) { final int numAltAlleles = vc.getAlternateAlleles().size(); if ( numAltAlleles == 1 ) - return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0)); + return vc.getAlternateAllele(0); - final List alleles = new ArrayList(numAltAlleles); - for ( Allele allele : vc.getAlternateAlleles() ) - alleles.add(vc.getAlleleStringWithRefPadding(allele)); - return alleles; + return vc.getAlternateAlleles(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index e8c6794f2..78c9c4a1c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -44,6 +45,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; @@ -78,6 +80,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-40,stop=40)) public class VariantsToVCF extends RodWalker { @@ -100,12 +103,6 @@ public class VariantsToVCF extends RodWalker { @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false) protected String sampleName = null; - /** - * This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF). - */ - @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false) - protected boolean fixReferenceBase = false; - private Set allowedGenotypeFormatStrings = new HashSet(); private boolean wroteHeader = false; private Set samples; @@ -137,10 +134,6 @@ public class VariantsToVCF extends RodWalker { builder.genotypes(g); } - if ( fixReferenceBase ) { - builder.referenceBaseForIndel(ref.getBase()); - } - writeRecord(builder.make(), tracker, ref.getLocus()); } @@ -166,8 +159,8 @@ public class VariantsToVCF extends RodWalker { continue; Map alleleMap = new HashMap(2); - alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion())); - alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index dcdef5aab..883436582 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -195,7 +195,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram { private static final List gatkPackages = Arrays.asList( "org.broadinstitute.sting.gatk", "org.broadinstitute.sting.pipeline", - "org.broadinstitute.sting.analyzecovariates", "org.broadinstitute.sting.gatk.datasources.reads.utilities"); /** @@ -252,7 +251,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { */ private void writeFilter(String className, List argumentFields, Set> dependents) throws IOException { String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction", - className, "", false, String.format(" + \" --read_filter %s\"", className), argumentFields, dependents); + className, "", false, String.format(" + required(\"--read_filter\", \"%s\")", className), argumentFields, dependents); writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content); } diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 393dd5735..0065f9258 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -431,6 +431,37 @@ public class BaseUtils { return new String(simpleComplement(bases.getBytes())); } + /** + * Returns the uppercased version of the bases + * + * @param bases the bases + * @return the upper cased version + */ + static public byte[] convertToUpperCase(final byte[] bases) { + for ( int i = 0; i < bases.length; i++ ) { + if ( (char)bases[i] >= 'a' ) + bases[i] = toUpperCaseBase(bases[i]); + } + return bases; + } + + static public byte toUpperCaseBase(final byte base) { + switch (base) { + case 'a': + return 'A'; + case 'c': + return 'C'; + case 'g': + return 'G'; + case 't': + return 'T'; + case 'n': + return 'N'; + default: + return base; + } + } + /** * Returns the index of the most common base in the basecounts array. To be used with * pileup.getBaseCounts. diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index b3f4542af..4f2b5b2eb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -304,11 +304,11 @@ public final class GenomeLocParser { return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, contigInfo.getNSequences())); if ( mustBeOnReference ) { - if (start < 0) - return vglHelper(exceptOnError, String.format("The start position %d is less than 0", start)); + if (start < 1) + return vglHelper(exceptOnError, String.format("The start position %d is less than 1", start)); - if (stop < 0) - return vglHelper(exceptOnError, String.format("The stop position %d is less than 0", stop)); + if (stop < 1) + return vglHelper(exceptOnError, String.format("The stop position %d is less than 1", stop)); int contigSize = contigInfo.getSequence(contigIndex).getSequenceLength(); if (start > contigSize || stop > contigSize) diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index d14d0deee..fcde1f419 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.Cigar; +import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -40,10 +41,13 @@ public class Haplotype { protected final double[] quals; private GenomeLoc genomeLocation = null; private HashMap readLikelihoodsPerSample = null; + private HashMap readCountsPerSample = null; private HashMap eventMap = null; private boolean isRef = false; private Cigar cigar; private int alignmentStartHapwrtRef; + public int leftBreakPoint = 0; + public int rightBreakPoint = 0; /** * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual @@ -81,18 +85,27 @@ public class Haplotype { return Arrays.hashCode(bases); } - public void addReadLikelihoods( final String sample, final double[] readLikelihoods ) { + public void addReadLikelihoods( final String sample, final double[] readLikelihoods, final int[] readCounts ) { if( readLikelihoodsPerSample == null ) { readLikelihoodsPerSample = new HashMap(); } readLikelihoodsPerSample.put(sample, readLikelihoods); + if( readCountsPerSample == null ) { + readCountsPerSample = new HashMap(); + } + readCountsPerSample.put(sample, readCounts); } @Ensures({"result != null"}) public double[] getReadLikelihoods( final String sample ) { return readLikelihoodsPerSample.get(sample); } - + + @Ensures({"result != null"}) + public int[] getReadCounts( final String sample ) { + return readCountsPerSample.get(sample); + } + public Set getSampleKeySet() { return readLikelihoodsPerSample.keySet(); } @@ -158,52 +171,24 @@ public class Haplotype { } @Requires({"refInsertLocation >= 0"}) - public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) { - - if( refAllele.length() != altAllele.length() ) { refInsertLocation++; } + public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) { + // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true); - if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype - return new Haplotype(bases.clone()); + if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype + return null; } - byte[] newHaplotype; - - try { - if( refAllele.length() == altAllele.length() ) { // SNP or MNP - newHaplotype = bases.clone(); - for( int iii = 0; iii < altAllele.length(); iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; - } - } else if( refAllele.length() < altAllele.length() ) { // insertion - final int altAlleleLength = altAllele.length(); - newHaplotype = new byte[bases.length + altAlleleLength]; - for( int iii = 0; iii < bases.length; iii++ ) { - newHaplotype[iii] = bases[iii]; - } - for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) { - newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; - } - for( int iii = 0; iii < altAlleleLength; iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; - } - } else { // deletion - final int shift = refAllele.length() - altAllele.length(); - newHaplotype = new byte[bases.length - shift]; - for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) { - newHaplotype[iii] = bases[iii]; - } - for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) { - newHaplotype[iii] = bases[iii+shift]; - } - } - } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding - return new Haplotype(bases.clone()); - } - - return new Haplotype(newHaplotype); + byte[] newHaplotypeBases = new byte[]{}; + newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant + newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant + newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant + return new Haplotype(newHaplotypeBases); } - public static LinkedHashMap makeHaplotypeListFromAlleles(List alleleList, int startPos, ReferenceContext ref, - final int haplotypeSize, final int numPrefBases) { + public static LinkedHashMap makeHaplotypeListFromAlleles(final List alleleList, + final int startPos, + final ReferenceContext ref, + final int haplotypeSize, + final int numPrefBases) { LinkedHashMap haplotypeMap = new LinkedHashMap(); @@ -214,7 +199,6 @@ public class Haplotype { refAllele = a; break; } - } if (refAllele == null) @@ -222,19 +206,12 @@ public class Haplotype { byte[] refBases = ref.getBases(); + final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart(); + final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases)); - int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart()); - //int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event - - - byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases); - int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length; // protect against long events that overrun available reference context - if (startAfter > refBases.length) - startAfter = refBases.length; - byte[] basesAfterVariant = Arrays.copyOfRange(refBases, - startAfter, refBases.length); - + final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length); + final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length)); // Create location for all haplotypes final int startLoc = ref.getWindow().getStart() + startIdxInReference; @@ -242,16 +219,14 @@ public class Haplotype { final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc); - for (final Allele a : alleleList) { - byte[] alleleBases = a.getBases(); + final byte[] alleleBases = a.getBases(); // use string concatenation - String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant); + String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant; haplotypeString = haplotypeString.substring(0,haplotypeSize); - haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); - + haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); } return haplotypeMap; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index e024253c9..96704f0b8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -210,6 +210,13 @@ public class MathUtils { return total; } + public static int sum(byte[] x) { + int total = 0; + for (byte v : x) + total += (int)v; + return total; + } + /** * Calculates the log10 cumulative sum of an array with log10 probabilities * @@ -1596,7 +1603,17 @@ public class MathUtils { result += v1[k].doubleValue() * v2[k].doubleValue(); return result; + } + public static double dotProduct(double[] v1, double[] v2) { + if (v1.length != v2.length) + throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()"); + + double result = 0.0; + for (int k = 0; k < v1.length; k++) + result += v1[k] * v2[k]; + + return result; } public static double[] vectorLog10(double v1[]) { diff --git a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java index 9fcb97a4d..15f7a7869 100644 --- a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java @@ -36,7 +36,7 @@ import java.util.*; */ public class PairHMM { - private static final int MAX_CACHED_QUAL = (int)Byte.MAX_VALUE; + private static final Byte MAX_CACHED_QUAL = Byte.MAX_VALUE; private static final byte DEFAULT_GOP = (byte) 45; private static final byte DEFAULT_GCP = (byte) 10; private static final double BANDING_TOLERANCE = 22.0; diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 4acc0e2c3..1242e5b00 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -69,9 +69,14 @@ public class QualityUtils { * @return a probability (0.0 - 1.0) */ static private double qualToErrorProbRaw(int qual) { + return qualToErrorProb((double) qual); + } + + public static double qualToErrorProb(final double qual) { return Math.pow(10.0, ((double) qual)/-10.0); } + static public double qualToErrorProb(byte qual) { return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc. } diff --git a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java index 92d73a5ce..bc1158a4d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java @@ -713,11 +713,11 @@ public class SWPairwiseAlignment { System.err.println("Only one "+argname+" argument is allowed"); System.exit(1); } - if ( l.get(0).equals("true") ) return new Boolean(true); - if ( l.get(0).equals("false") ) return new Boolean(false); + if ( l.get(0).equals("true") ) return Boolean.valueOf(true); + if ( l.get(0).equals("false") ) return Boolean.valueOf(false); System.err.println("Can not parse value provided for "+argname+" argument ("+l.get(0)+"); true/false are allowed"); System.exit(1); - return null; + return Boolean.valueOf(false); // This value isn't used because it is preceded by System.exit(1) } /* ############################################## diff --git a/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java b/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java index d7a390692..9e10fd670 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java @@ -329,7 +329,7 @@ public class SequenceDictionaryUtils { */ private static class CompareSequenceRecordsByIndex implements Comparator { public int compare(SAMSequenceRecord x, SAMSequenceRecord y) { - return new Integer(x.getSequenceIndex()).compareTo(y.getSequenceIndex()); + return Integer.valueOf(x.getSequenceIndex()).compareTo(y.getSequenceIndex()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 18276f932..8e660350f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -90,11 +90,11 @@ public class ActiveRegion implements HasGenomeLocation, Comparable public void removeAll( final ArrayList readsToRemove ) { reads.removeAll( readsToRemove ); } public boolean equalExceptReads(final ActiveRegion other) { - if ( ! activeRegionLoc.equals(other.activeRegionLoc) ) return false; + if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false; if ( isActive != other.isActive ) return false; if ( genomeLocParser != other.genomeLocParser ) return false; if ( extension != other.extension ) return false; - if ( ! extendedLoc.equals(other.extendedLoc) ) return false; + if ( extendedLoc.compareTo(other.extendedLoc) != 0 ) return false; return true; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 4333e471e..73f3cc487 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -46,15 +45,14 @@ public class ActivityProfile { final GenomeLocParser parser; final boolean presetRegions; GenomeLoc regionStartLoc = null; - final List isActiveList; - private GenomeLoc lastLoc = null; - private static final int FILTER_SIZE = 65; - private static final Double[] GaussianKernel; + final List isActiveList; + private static final int FILTER_SIZE = 80; + private static final double[] GaussianKernel; static { - GaussianKernel = new Double[2*FILTER_SIZE + 1]; + GaussianKernel = new double[2*FILTER_SIZE + 1]; for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { - GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii); + GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); } } @@ -63,22 +61,20 @@ public class ActivityProfile { // todo -- add unit tests // TODO -- own preset regions public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) { - this(parser, presetRegions, new ArrayList(), null); + this(parser, presetRegions, new ArrayList(), null); } - protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { + protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { this.parser = parser; this.presetRegions = presetRegions; this.isActiveList = isActiveList; this.regionStartLoc = regionStartLoc; } - public void add(final GenomeLoc loc, final double score) { + public void add(final GenomeLoc loc, final ActivityProfileResult result) { if ( loc.size() != 1 ) throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" ); - if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 ) - throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc); - isActiveList.add(score); + isActiveList.add(result); if( regionStartLoc == null ) { regionStartLoc = loc; } @@ -93,22 +89,43 @@ public class ActivityProfile { * @return a new ActivityProfile that's the band-pass filtered version of this profile */ public ActivityProfile bandPassFilter() { - final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]); - final Double[] filteredProbArray = new Double[activeProbArray.length]; + final double[] activeProbArray = new double[isActiveList.size()]; + int iii = 0; + for( final ActivityProfileResult result : isActiveList ) { + activeProbArray[iii++] = result.isActiveProb; + } + iii = 0; + for( final ActivityProfileResult result : isActiveList ) { + if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups + final int numHQClips = result.resultValue.intValue(); + for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) { + activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]); + } + } + iii++; + } + final double[] filteredProbArray = new double[activeProbArray.length]; if( !presetRegions ) { - for( int iii = 0; iii < activeProbArray.length; iii++ ) { - final Double[] kernel = (Double[]) ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); - final Double[] activeProbSubArray = (Double[]) ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); + for( iii = 0; iii < activeProbArray.length; iii++ ) { + final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); + final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); } } - return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc); + iii = 0; + for( final double prob : filteredProbArray ) { + final ActivityProfileResult result = isActiveList.get(iii++); + result.isActiveProb = prob; + result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE; + result.resultValue = null; + } + return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc); } /** * Partition this profile into active regions - * @param activeRegionExtension - * @return + * @param activeRegionExtension the amount of margin overlap in the active region + * @return the list of active regions */ public List createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) { final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author @@ -119,14 +136,14 @@ public class ActivityProfile { return Collections.emptyList(); } else if( isActiveList.size() == 1 ) { // there's a single element, it's either active or inactive - boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD; + boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize)); } else { // there are 2+ elements, divide these up into regions - boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD; + boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; int curStart = 0; for(int iii = 1; iii < isActiveList.size(); iii++ ) { - final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD; + final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD; if( isActive != thisStatus ) { returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize)); isActive = thisStatus; @@ -143,7 +160,7 @@ public class ActivityProfile { * @param isActive should the region be active? * @param curStart offset (0-based) from the start of this region * @param curEnd offset (0-based) from the start of this region - * @param activeRegionExtension + * @param activeRegionExtension the amount of margin overlap in the active region * @return a fully initialized ActiveRegion with the above properties */ private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { @@ -160,8 +177,8 @@ public class ActivityProfile { int cutPoint = -1; final int size = curEnd - curStart + 1; - for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) { - if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; } + for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) { + if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; } } final List leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList()); final List rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java new file mode 100644 index 000000000..8dc29aa3c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.utils.activeregion; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin + * Date: 7/27/12 + */ + +public class ActivityProfileResult { + public double isActiveProb; + public ActivityProfileResultState resultState; + public Number resultValue; + + public enum ActivityProfileResultState { + NONE, + HIGH_QUALITY_SOFT_CLIPS + } + + public ActivityProfileResult( final double isActiveProb ) { + this.isActiveProb = isActiveProb; + this.resultState = ActivityProfileResultState.NONE; + this.resultValue = null; + } + + public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + this.isActiveProb = isActiveProb; + this.resultState = resultState; + this.resultValue = resultValue; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/GATKLiteUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/GATKLiteUtils.java new file mode 100755 index 000000000..2ab7d0618 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/GATKLiteUtils.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.classloader; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: ebanks + * + * A set of static utility methods for working with the full vs. Lite GATK build + */ +public class GATKLiteUtils { + /** + * Constructor access disallowed...static utility methods only! + */ + private GATKLiteUtils() { } + + + private static Set fullVersionGATKWalkers = new HashSet(); + static { + fullVersionGATKWalkers.add("HaplotypeCaller"); + fullVersionGATKWalkers.add("ReduceReads"); + } + /** + * Utility method to check whether a given walker is only available in the full GATK release + * + * @param walkerName the walker class name (not the package) to check + */ + public static boolean isAvailableOnlyInFullGATK(final String walkerName) { + return fullVersionGATKWalkers.contains(walkerName); + } + + /** + * Utility method to determine whether this is the lite version of the GATK + */ + public static boolean isGATKLite() { + if ( isLiteVersion == null ) { + try { + Class.forName(DummyProtectedClassName); + isLiteVersion = false; + } catch ( ClassNotFoundException e) { + isLiteVersion = true; + } + } + return isLiteVersion; + } + private static final String DummyProtectedClassName = "org.broadinstitute.sting.gatk.DummyProtectedClass"; + private static Boolean isLiteVersion = null; + + + /** + * Utility method to pull out a protected subclass if possible, otherwise it falls back to a public subclass. + * Important note: the protected classes MUST implement ProtectedPackageSource! + * + * @param interfaceClass the interface class which the target classes implement + */ + public static Class getProtectedClassIfAvailable(final Class interfaceClass) { + List> classes = new PluginManager(interfaceClass).getPlugins(); + if ( classes.isEmpty() ) + throw new ReviewedStingException("No classes implementing the interface class " + interfaceClass.getSimpleName() + " were found"); + + Class result = null; + for ( Class c : classes ) { + if ( ProtectedPackageSource.class.isAssignableFrom(c) ) { + result = c; + break; + } + } + if ( result == null ) + result = classes.get(0); + + return result; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index f24bbb636..9a2cb68db 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -168,6 +168,28 @@ public class PluginManager { String pluginName = getName(pluginClass); pluginsByName.put(pluginName, pluginClass); } + + // sort the plugins so the order of elements is deterministic + sortPlugins(plugins); + sortPlugins(interfaces); + } + + /** + * Sorts, in place, the list of plugins according to getName() on each element + * + * @param unsortedPlugins + */ + private final void sortPlugins(final List> unsortedPlugins) { + Collections.sort(unsortedPlugins, new ComparePluginsByName()); + } + + private final class ComparePluginsByName implements Comparator> { + @Override + public int compare(final Class aClass, final Class aClass1) { + String pluginName1 = getName(aClass); + String pluginName2 = getName(aClass1); + return pluginName1.compareTo(pluginName2); + } } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java b/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java new file mode 100755 index 000000000..c1da5fb02 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java @@ -0,0 +1,3 @@ +package org.broadinstitute.sting.utils.classloader; + +public interface ProtectedPackageSource {} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java new file mode 100755 index 000000000..9e2b33aae --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java @@ -0,0 +1,3 @@ +package org.broadinstitute.sting.utils.classloader; + +public interface PublicPackageSource {} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index a4383c3ae..08c50b982 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -4,7 +4,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -538,7 +538,7 @@ public class ClippingOp { return 0; } - private class CigarShift { + private static class CigarShift { private Cigar cigar; private int shiftFromStart; private int shiftFromEnd; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index d438de900..6392ce4ce 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -110,37 +110,32 @@ public class ReadClipper { } /** - * Creates a new read that's been clipped according to ops and the chosen algorithm. - * The original read is unmodified. + * Clips a read according to ops and the chosen algorithm. * * @param algorithm What mode of clipping do you want to apply for the stacked operations. - * @return a new read with the clipping applied. + * @return the read with the clipping applied. */ public GATKSAMRecord clipRead(ClippingRepresentation algorithm) { if (ops == null) return getRead(); - else { - try { - GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone(); - for (ClippingOp op : getOps()) { - //check if the clipped read can still be clipped in the range requested - if (op.start < clippedRead.getReadLength()) { - ClippingOp fixedOperation = op; - if (op.stop >= clippedRead.getReadLength()) - fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1); - clippedRead = fixedOperation.apply(algorithm, clippedRead); - } - } - wasClipped = true; - ops.clear(); - if ( clippedRead.isEmpty() ) - return GATKSAMRecord.emptyRead(clippedRead); - return clippedRead; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); // this should never happen + GATKSAMRecord clippedRead = read; + for (ClippingOp op : getOps()) { + final int readLength = clippedRead.getReadLength(); + //check if the clipped read can still be clipped in the range requested + if (op.start < readLength) { + ClippingOp fixedOperation = op; + if (op.stop >= readLength) + fixedOperation = new ClippingOp(op.start, readLength - 1); + + clippedRead = fixedOperation.apply(algorithm, clippedRead); } } + wasClipped = true; + ops.clear(); + if ( clippedRead.isEmpty() ) + return GATKSAMRecord.emptyRead(clippedRead); + return clippedRead; } @@ -241,20 +236,21 @@ public class ReadClipper { if (read.isEmpty()) return read; - byte [] quals = read.getBaseQualities(); + final byte [] quals = read.getBaseQualities(); + final int readLength = read.getReadLength(); int leftClipIndex = 0; - int rightClipIndex = read.getReadLength() - 1; + int rightClipIndex = readLength - 1; // check how far we can clip both sides while (rightClipIndex >= 0 && quals[rightClipIndex] <= lowQual) rightClipIndex--; - while (leftClipIndex < read.getReadLength() && quals[leftClipIndex] <= lowQual) leftClipIndex++; + while (leftClipIndex < readLength && quals[leftClipIndex] <= lowQual) leftClipIndex++; // if the entire read should be clipped, then return an empty read. if (leftClipIndex > rightClipIndex) return GATKSAMRecord.emptyRead(read); - if (rightClipIndex < read.getReadLength() - 1) { - this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1)); + if (rightClipIndex < readLength - 1) { + this.addOp(new ClippingOp(rightClipIndex + 1, readLength - 1)); } if (leftClipIndex > 0 ) { this.addOp(new ClippingOp(0, leftClipIndex - 1)); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 18b4d0b6c..570ca7c1c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -26,15 +26,12 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodecHeader; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -44,15 +41,23 @@ import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Decode BCF2 files */ -public final class BCF2Codec implements FeatureCodec, ReferenceDependentFeatureCodec { +public final class BCF2Codec implements FeatureCodec { final protected static Logger logger = Logger.getLogger(BCF2Codec.class); private final static boolean FORBID_SYMBOLICS = false; + private final static int ALLOWED_MAJOR_VERSION = 2; + private final static int MIN_MINOR_VERSION = 1; + + private BCFVersion bcfVersion = null; + private VCFHeader header = null; /** @@ -108,18 +113,22 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD @Override public VariantContext decode( final PositionalBufferedStream inputStream ) { - recordNo++; - final VariantContextBuilder builder = new VariantContextBuilder(); + try { + recordNo++; + final VariantContextBuilder builder = new VariantContextBuilder(); - final int sitesBlockSize = decoder.readBlockSize(inputStream); - final int genotypeBlockSize = decoder.readBlockSize(inputStream); - decoder.readNextBlock(sitesBlockSize, inputStream); - decodeSiteLoc(builder); - final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); + final int sitesBlockSize = decoder.readBlockSize(inputStream); + final int genotypeBlockSize = decoder.readBlockSize(inputStream); + decoder.readNextBlock(sitesBlockSize, inputStream); + decodeSiteLoc(builder); + final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); - decoder.readNextBlock(genotypeBlockSize, inputStream); - createLazyGenotypesDecoder(info, builder); - return builder.fullyDecoded(true).make(); + decoder.readNextBlock(genotypeBlockSize, inputStream); + createLazyGenotypesDecoder(info, builder); + return builder.fullyDecoded(true).make(); + } catch ( IOException e ) { + throw new UserException.CouldNotReadInputFile("Failed to read BCF file", e); + } } @Override @@ -131,10 +140,18 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) { try { // note that this reads the magic as well, and so does double duty - if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) ) - error("Input stream does not begin with BCF2 magic"); + bcfVersion = BCFVersion.readBCFVersion(inputStream); + if ( bcfVersion == null ) + error("Input stream does not contain a BCF encoded file; BCF magic header info not found"); - final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream); + if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION ) + error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion()); + if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION ) + error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion()); + + logger.info("BCF version " + bcfVersion); + + final int headerSizeInBytes = BCF2Type.INT32.read(inputStream); if ( headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE); @@ -154,7 +171,6 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD // create the config offsets if ( ! header.getContigLines().isEmpty() ) { - logger.info("Found contig lines in BCF2 file, using those"); contigNames.clear(); for ( final VCFContigHeaderLine contig : header.getContigLines()) { if ( contig.getID() == null || contig.getID().equals("") ) @@ -162,7 +178,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD contigNames.add(contig.getID()); } } else { - logger.info("Didn't find any contig lines in BCF2 file, falling back (dangerously) to GATK reference dictionary"); + throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header"); } // create the string dictionary @@ -187,7 +203,8 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD FileInputStream fis = null; try { fis = new FileInputStream(path); - return BCF2Utils.startsWithBCF2Magic(fis); + final BCFVersion version = BCFVersion.readBCFVersion(fis); + return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION; } catch ( FileNotFoundException e ) { return false; } catch ( IOException e ) { @@ -196,24 +213,11 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD try { if ( fis != null ) fis.close(); } catch ( IOException e ) { - ; // do nothing + // do nothing } } } - // -------------------------------------------------------------------------------- - // - // Reference dependence - // - // -------------------------------------------------------------------------------- - - @Override - public void setGenomeLocParser(final GenomeLocParser genomeLocParser) { - // initialize contigNames to standard ones in reference - for ( final SAMSequenceRecord contig : genomeLocParser.getContigs().getSequences() ) - contigNames.add(contig.getSequenceName()); - } - // -------------------------------------------------------------------------------- // // implicit block @@ -234,7 +238,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD * @return */ @Requires({"builder != null"}) - private final void decodeSiteLoc(final VariantContextBuilder builder) { + private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException { final int contigOffset = decoder.decodeInt(BCF2Type.INT32); final String contig = lookupContigName(contigOffset); builder.chr(contig); @@ -253,7 +257,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD */ @Requires({"builder != null", "decoder != null"}) @Ensures({"result != null", "result.isValid()"}) - private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) { + private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException { final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT); if ( qual != null ) { builder.log10PError(((Double)qual) / -10.0); @@ -309,7 +313,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD * Decode the id field in this BCF2 file and store it in the builder * @param builder */ - private void decodeID( final VariantContextBuilder builder ) { + private void decodeID( final VariantContextBuilder builder ) throws IOException { final String id = (String)decoder.decodeTypedValue(); if ( id == null ) @@ -318,27 +322,6 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD builder.id(id); } - /** - * Annoying routine that deals with allele clipping from the BCF2 encoding to the standard - * GATK encoding. - * - * @param position - * @param ref - * @param unclippedAlleles - * @return - */ - @Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"}) - @Ensures("result.size() == unclippedAlleles.size()") - protected List clipAllelesIfNecessary(final int position, - final String ref, - final List unclippedAlleles) { - // the last argument of 1 allows us to safely ignore the end, because we are - // ultimately going to use the end in the record itself - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1); - if ( clipped.getError() != null ) error(clipped.getError()); - return clipped.getClippedAlleles(); - } - /** * Decode the alleles from this BCF2 file and put the results in builder * @param builder @@ -347,7 +330,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD * @return the alleles */ @Requires("nAlleles > 0") - private List decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) { + private List decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) throws IOException { // TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes List alleles = new ArrayList(nAlleles); String ref = null; @@ -366,11 +349,9 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD } assert ref != null; - alleles = clipAllelesIfNecessary(pos, ref, alleles); builder.alleles(alleles); assert ref.length() > 0; - builder.referenceBaseForIndel(ref.getBytes()[0]); return alleles; } @@ -379,7 +360,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD * Decode the filter field of this BCF2 file and store the result in the builder * @param builder */ - private void decodeFilter( final VariantContextBuilder builder ) { + private void decodeFilter( final VariantContextBuilder builder ) throws IOException { final Object value = decoder.decodeTypedValue(); if ( value == null ) @@ -406,7 +387,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD * @param numInfoFields */ @Requires("numInfoFields >= 0") - private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) { + private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException { if ( numInfoFields == 0 ) // fast path, don't bother doing any work if there are no fields return; @@ -466,7 +447,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD } @Ensures("result != null") - private final String getDictionaryString() { + private final String getDictionaryString() throws IOException { return getDictionaryString((Integer) decoder.decodeTypedValue()); } @@ -515,6 +496,6 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD } private final void error(final String message) throws RuntimeException { - throw new UserException.MalformedBCF2(String.format("At record %d with position %d:", recordNo, pos, message)); + throw new UserException.MalformedBCF2(String.format("%s, at record %d with position %d:", message, recordNo, pos)); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index 7a6d96131..d7f59632c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -129,39 +129,40 @@ public final class BCF2Decoder { // // ---------------------------------------------------------------------- - public final Object decodeTypedValue() { + public final Object decodeTypedValue() throws IOException { final byte typeDescriptor = readTypeDescriptor(); return decodeTypedValue(typeDescriptor); } - public final Object decodeTypedValue(final byte typeDescriptor) { + public final Object decodeTypedValue(final byte typeDescriptor) throws IOException { final int size = decodeNumberOfElements(typeDescriptor); return decodeTypedValue(typeDescriptor, size); } - public final Object decodeTypedValue(final byte typeDescriptor, final int size) { - final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); - - assert size >= 0; - + @Requires("size >= 0") + public final Object decodeTypedValue(final byte typeDescriptor, final int size) throws IOException { if ( size == 0 ) { + // missing value => null in java return null; - } else if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency - return decodeLiteralString(size); - } else if ( size == 1 ) { - return decodeSingleValue(type); } else { - final ArrayList ints = new ArrayList(size); - for ( int i = 0; i < size; i++ ) { - final Object val = decodeSingleValue(type); - if ( val == null ) continue; // auto-pruning. We remove trailing nulls - ints.add(val); + final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); + if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency + return decodeLiteralString(size); + } else if ( size == 1 ) { + return decodeSingleValue(type); + } else { + final ArrayList ints = new ArrayList(size); + for ( int i = 0; i < size; i++ ) { + final Object val = decodeSingleValue(type); + if ( val == null ) continue; // auto-pruning. We remove trailing nulls + ints.add(val); + } + return ints.isEmpty() ? null : ints; // return null when all of the values are null } - return ints.isEmpty() ? null : ints; // return null when all of the values are null } } - public final Object decodeSingleValue(final BCF2Type type) { + public final Object decodeSingleValue(final BCF2Type type) throws IOException { // TODO -- decodeTypedValue should integrate this routine final int value = decodeInt(type); @@ -201,7 +202,7 @@ public final class BCF2Decoder { return null; else { final String s = new String(bytes, 0, goodLength); - return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s; + return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s; } } catch ( IOException e ) { throw new ReviewedStingException("readByte failure", e); @@ -209,7 +210,7 @@ public final class BCF2Decoder { } @Ensures("result >= 0") - public final int decodeNumberOfElements(final byte typeDescriptor) { + public final int decodeNumberOfElements(final byte typeDescriptor) throws IOException { if ( BCF2Utils.sizeIsOverflow(typeDescriptor) ) // -1 ensures we explode immediately with a bad size if the result is missing return decodeInt(readTypeDescriptor(), -1); @@ -227,15 +228,15 @@ public final class BCF2Decoder { * @return */ @Requires("BCF2Utils.decodeSize(typeDescriptor) == 1") - public final int decodeInt(final byte typeDescriptor, final int missingValue) { + public final int decodeInt(final byte typeDescriptor, final int missingValue) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); final int i = decodeInt(type); return i == type.getMissingBytes() ? missingValue : i; } @Requires("type != null") - public final int decodeInt(final BCF2Type type) { - return BCF2Utils.readInt(type.getSizeInBytes(), recordStream); + public final int decodeInt(final BCF2Type type) throws IOException { + return type.read(recordStream); } /** @@ -256,8 +257,8 @@ public final class BCF2Decoder { * int elements are still forced to do a fresh allocation as well. * @return see description */ - @Requires({"BCF2Type.INTEGERS.contains(type)", "size >= 0", "type != null"}) - public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) { + @Requires({"type != null", "type.isIntegerType()", "size >= 0"}) + public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException { if ( size == 0 ) { return null; } else { @@ -289,12 +290,12 @@ public final class BCF2Decoder { } } - public final int[] decodeIntArray(final byte typeDescriptor, final int size) { + public final int[] decodeIntArray(final byte typeDescriptor, final int size) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); return decodeIntArray(size, type, null); } - public final double rawFloatToFloat(final int rawFloat) { + private double rawFloatToFloat(final int rawFloat) { return (double)Float.intBitsToFloat(rawFloat); } @@ -310,8 +311,8 @@ public final class BCF2Decoder { * @param inputStream * @return */ - public final int readBlockSize(final InputStream inputStream) { - return BCF2Utils.readInt(4, inputStream); + public final int readBlockSize(final InputStream inputStream) throws IOException { + return BCF2Type.INT32.read(inputStream); } /** @@ -344,7 +345,7 @@ public final class BCF2Decoder { } } - public final byte readTypeDescriptor() { + public final byte readTypeDescriptor() throws IOException { return BCF2Utils.readByte(recordStream); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java index 0dadc49f9..e4ae96262 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; +import java.io.IOException; import java.util.*; /** @@ -105,12 +106,12 @@ public class BCF2GenotypeFieldDecoders { final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, - final GenotypeBuilder[] gbs); + final GenotypeBuilder[] gbs) throws IOException; } private class GTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES ) fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs); else { @@ -135,7 +136,7 @@ public class BCF2GenotypeFieldDecoders { private final void fastBiallelicDiploidDecode(final List siteAlleles, final BCF2Decoder decoder, final byte typeDescriptor, - final GenotypeBuilder[] gbs) { + final GenotypeBuilder[] gbs) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); final int nPossibleGenotypes = 3 * 3; @@ -177,7 +178,7 @@ public class BCF2GenotypeFieldDecoders { final int ploidy, final BCF2Decoder decoder, final byte typeDescriptor, - final GenotypeBuilder[] gbs) { + final GenotypeBuilder[] gbs) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); // a single cache for the encoded genotypes, since we don't actually need this vector @@ -216,7 +217,7 @@ public class BCF2GenotypeFieldDecoders { private class DPDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { // the -1 is for missing gb.DP(decoder.decodeInt(typeDescriptor, -1)); @@ -226,7 +227,7 @@ public class BCF2GenotypeFieldDecoders { private class GQDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { // the -1 is for missing gb.GQ(decoder.decodeInt(typeDescriptor, -1)); @@ -236,7 +237,7 @@ public class BCF2GenotypeFieldDecoders { private class ADDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { gb.AD(decoder.decodeIntArray(typeDescriptor, numElements)); } @@ -245,7 +246,7 @@ public class BCF2GenotypeFieldDecoders { private class PLDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { gb.PL(decoder.decodeIntArray(typeDescriptor, numElements)); } @@ -254,7 +255,7 @@ public class BCF2GenotypeFieldDecoders { private class GenericDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { Object value = decoder.decodeTypedValue(typeDescriptor, numElements); if ( value != null ) { // don't add missing values @@ -273,7 +274,7 @@ public class BCF2GenotypeFieldDecoders { private class FTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { Object value = decoder.decodeTypedValue(typeDescriptor, numElements); assert value == null || value instanceof String; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java index 35fb2e97a..cf34a8b48 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java @@ -26,9 +26,11 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Requires; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; +import java.io.IOException; import java.util.*; /** @@ -64,33 +66,38 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser { if ( logger.isDebugEnabled() ) logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each"); - // load our byte[] data into the decoder - final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes); + try { - for ( int i = 0; i < nSamples; i++ ) - builders[i].reset(true); + // load our byte[] data into the decoder + final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes); - for ( int i = 0; i < nFields; i++ ) { - // get the field name - final int offset = (Integer) decoder.decodeTypedValue(); - final String field = codec.getDictionaryString(offset); + for ( int i = 0; i < nSamples; i++ ) + builders[i].reset(true); - // the type of each element - final byte typeDescriptor = decoder.readTypeDescriptor(); - final int numElements = decoder.decodeNumberOfElements(typeDescriptor); - final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field); - try { - fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); - } catch ( ClassCastException e ) { - throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field - + " inconsistent with the value observed in the decoded value"); + for ( int i = 0; i < nFields; i++ ) { + // get the field name + final int offset = (Integer) decoder.decodeTypedValue(); + final String field = codec.getDictionaryString(offset); + + // the type of each element + final byte typeDescriptor = decoder.readTypeDescriptor(); + final int numElements = decoder.decodeNumberOfElements(typeDescriptor); + final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field); + try { + fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); + } catch ( ClassCastException e ) { + throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field + + " inconsistent with the value observed in the decoded value"); + } } + + final ArrayList genotypes = new ArrayList(nSamples); + for ( final GenotypeBuilder gb : builders ) + genotypes.add(gb.make()); + + return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); + } catch ( IOException e ) { + throw new ReviewedStingException("Unexpected IOException parsing already read genotypes data block", e); } - - final ArrayList genotypes = new ArrayList(nSamples); - for ( final GenotypeBuilder gb : builders ) - genotypes.add(gb.make()); - - return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java index f874b14cd..6fd698ff6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java @@ -26,6 +26,9 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Requires; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.EnumSet; /** @@ -35,11 +38,87 @@ import java.util.EnumSet; * @since 05/12 */ public enum BCF2Type { - INT8 (1, 1, 0xFFFFFF80, -127, 127), // todo -- confirm range - INT16(2, 2, 0xFFFF8000, -32767, 32767), - INT32(3, 4, 0x80000000, -2147483647, 2147483647), - FLOAT(5, 4, 0x7F800001), - CHAR (7, 1, 0x00000000); + // the actual values themselves + MISSING(0, 0, 0x00) { + @Override public int read(final InputStream in) throws IOException { + throw new IllegalArgumentException("Cannot read MISSING type"); + } + @Override public void write(final int value, final OutputStream out) throws IOException { + throw new IllegalArgumentException("Cannot write MISSING type"); + } + }, + + // todo -- confirm range + INT8 (1, 1, 0xFFFFFF80, -127, 127) { + @Override + public int read(final InputStream in) throws IOException { + return BCF2Utils.readByte(in); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + out.write(0xFF & value); // TODO -- do we need this operation? + } + }, + + INT16(2, 2, 0xFFFF8000, -32767, 32767) { + @Override + public int read(final InputStream in) throws IOException { + final int b2 = BCF2Utils.readByte(in) & 0xFF; + final int b1 = BCF2Utils.readByte(in) & 0xFF; + return (short)((b1 << 8) | b2); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + // TODO -- optimization -- should we put this in a local buffer? + out.write((0x00FF & value)); + out.write((0xFF00 & value) >> 8); + } + }, + + INT32(3, 4, 0x80000000, -2147483647, 2147483647) { + @Override + public int read(final InputStream in) throws IOException { + final int b4 = BCF2Utils.readByte(in) & 0xFF; + final int b3 = BCF2Utils.readByte(in) & 0xFF; + final int b2 = BCF2Utils.readByte(in) & 0xFF; + final int b1 = BCF2Utils.readByte(in) & 0xFF; + return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + out.write((0x000000FF & value)); + out.write((0x0000FF00 & value) >> 8); + out.write((0x00FF0000 & value) >> 16); + out.write((0xFF000000 & value) >> 24); + } + }, + + FLOAT(5, 4, 0x7F800001) { + @Override + public int read(final InputStream in) throws IOException { + return INT32.read(in); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + INT32.write(value, out); + } + }, + + CHAR (7, 1, 0x00000000) { + @Override + public int read(final InputStream in) throws IOException { + return INT8.read(in); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + INT8.write(value, out); + } + }; private final int id; private final Object missingJavaValue; @@ -47,10 +126,6 @@ public enum BCF2Type { private final int sizeInBytes; private final long minValue, maxValue; - BCF2Type(final int id) { - this(id, -1, 0, 0, 0); - } - BCF2Type(final int id, final int sizeInBytes, final int missingBytes) { this(id, sizeInBytes, missingBytes, 0, 0); } @@ -86,7 +161,7 @@ public enum BCF2Type { * @param v * @return */ - @Requires("INTEGERS.contains(this)") + @Requires("this.isIntegerType()") public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; } /** @@ -108,9 +183,37 @@ public enum BCF2Type { /** * An enum set of the types that might represent Integer values */ - public final static EnumSet INTEGERS = EnumSet.of(INT8, INT16, INT32); + private final static EnumSet INTEGERS = EnumSet.of(INT8, INT16, INT32); + + /** + * @return true if this BCF2Type corresponds to the magic "MISSING" type (0x00) + */ + public boolean isMissingType() { + return this == MISSING; + } public boolean isIntegerType() { return INTEGERS.contains(this); } + + /** + * Read a value from in stream of this BCF2 type as an int [32 bit] collection of bits + * + * For intX and char values this is just the int / byte value of the underlying data represented as a 32 bit int + * For a char the result must be converted to a char by (char)(byte)(0x0F & value) + * For doubles it's necessary to convert subsequently this value to a double via Double.bitsToDouble() + * + * @param in + * @return + * @throws IOException + */ + @Requires("in != null") + public int read(final InputStream in) throws IOException { + throw new IllegalArgumentException("Not implemented"); + } + + @Requires("out != null") + public void write(final int value, final OutputStream out) throws IOException { + throw new IllegalArgumentException("Not implemented"); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 143ab52df..e6e78d89d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -41,8 +41,6 @@ import java.util.*; * @since 5/12 */ public final class BCF2Utils { - public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes(); - public static final int MAX_ALLELES_IN_GENOTYPES = 127; public static final int OVERFLOW_ELEMENT_MARKER = 15; @@ -75,74 +73,54 @@ public final class BCF2Utils { */ @Requires("header != null") @Ensures({"result != null", "new HashSet(result).size() == result.size()"}) - public final static ArrayList makeDictionary(final VCFHeader header) { + public static ArrayList makeDictionary(final VCFHeader header) { final Set seen = new HashSet(); final ArrayList dict = new ArrayList(); - boolean sawPASS = false; + // special case the special PASS field which doesn't show up in the FILTER field definitions + seen.add(VCFConstants.PASSES_FILTERS_v4); + dict.add(VCFConstants.PASSES_FILTERS_v4); + // set up the strings dictionary for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) { final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line; if ( ! seen.contains(idLine.getID())) { - sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4); dict.add(idLine.getID()); seen.add(idLine.getID()); } } } - - if ( ! sawPASS ) - dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field - return dict; } - @Requires({"nElements >= 0", "type != null"}) - public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { - int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER); - byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F)); - return typeByte; + @Requires({"nElements >= 0", "nElements <= OVERFLOW_ELEMENT_MARKER", "type != null"}) + public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { + return (byte)((0x0F & nElements) << 4 | (type.getID() & 0x0F)); } @Ensures("result >= 0") - public final static int decodeSize(final byte typeDescriptor) { + public static int decodeSize(final byte typeDescriptor) { return (0xF0 & typeDescriptor) >> 4; } @Ensures("result >= 0") - public final static int decodeTypeID(final byte typeDescriptor) { + public static int decodeTypeID(final byte typeDescriptor) { return typeDescriptor & 0x0F; } @Ensures("result != null") - public final static BCF2Type decodeType(final byte typeDescriptor) { + public static BCF2Type decodeType(final byte typeDescriptor) { return ID_TO_ENUM[decodeTypeID(typeDescriptor)]; } - public final static boolean sizeIsOverflow(final byte typeDescriptor) { + public static boolean sizeIsOverflow(final byte typeDescriptor) { return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER; } - @Requires("nElements >= 0") - public final static boolean willOverflow(final long nElements) { - return nElements > MAX_INLINE_ELEMENTS; - } - - public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException { - final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length]; - stream.read(magicBytes); - return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE); - } - - public final static byte readByte(final InputStream stream) { - // TODO -- shouldn't be capturing error here - try { - return (byte)(stream.read() & 0xFF); - } catch ( IOException e ) { - throw new ReviewedStingException("readByte failure", e); - } + public static byte readByte(final InputStream stream) throws IOException { + return (byte)(stream.read() & 0xFF); } /** @@ -155,7 +133,7 @@ public final class BCF2Utils { */ @Requires({"strings != null", "strings.size() > 1"}) @Ensures("result != null") - public static final String collapseStringList(final List strings) { + public static String collapseStringList(final List strings) { final StringBuilder b = new StringBuilder(); for ( final String s : strings ) { if ( s != null ) { @@ -177,14 +155,14 @@ public final class BCF2Utils { */ @Requires({"collapsed != null", "isCollapsedString(collapsed)"}) @Ensures("result != null") - public static final List exploreStringList(final String collapsed) { + public static List explodeStringList(final String collapsed) { assert isCollapsedString(collapsed); final String[] exploded = collapsed.substring(1).split(","); return Arrays.asList(exploded); } @Requires("s != null") - public static final boolean isCollapsedString(final String s) { + public static boolean isCollapsedString(final String s) { return s.charAt(0) == ','; } @@ -225,8 +203,8 @@ public final class BCF2Utils { } } - @Ensures("BCF2Type.INTEGERS.contains(result)") - public final static BCF2Type determineIntegerType(final int value) { + @Ensures("result.isIntegerType()") + public static BCF2Type determineIntegerType(final int value) { for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) { if ( potentialType.withinRange(value) ) return potentialType; @@ -235,20 +213,20 @@ public final class BCF2Utils { throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value); } - @Ensures("BCF2Type.INTEGERS.contains(result)") - public final static BCF2Type determineIntegerType(final int[] values) { - // literally a copy of the code below, but there's no general way to unify lists and arrays in java - BCF2Type maxType = BCF2Type.INT8; - for ( final int value : values ) { - final BCF2Type type1 = determineIntegerType(value); - switch ( type1 ) { - case INT8: break; - case INT16: maxType = BCF2Type.INT16; break; - case INT32: return BCF2Type.INT32; // fast path for largest possible value - default: throw new ReviewedStingException("Unexpected integer type " + type1 ); - } + @Ensures("result.isIntegerType()") + public static BCF2Type determineIntegerType(final int[] values) { + // find the min and max values in the array + int max = 0, min = 0; + for ( final int v : values ) { + if ( v > max ) max = v; + if ( v < min ) min = v; } - return maxType; + + final BCF2Type maxType = determineIntegerType(max); + final BCF2Type minType = determineIntegerType(min); + + // INT8 < INT16 < INT32 so this returns the larger of the two + return maxType.compareTo(minType) >= 0 ? maxType : minType; } /** @@ -260,9 +238,9 @@ public final class BCF2Utils { * @param t2 * @return */ - @Requires({"BCF2Type.INTEGERS.contains(t1)","BCF2Type.INTEGERS.contains(t2)"}) - @Ensures("BCF2Type.INTEGERS.contains(result)") - public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) { + @Requires({"t1.isIntegerType()","t2.isIntegerType()"}) + @Ensures("result.isIntegerType()") + public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) { switch ( t1 ) { case INT8: return t2; case INT16: return t2 == BCF2Type.INT32 ? t2 : t1; @@ -271,8 +249,8 @@ public final class BCF2Utils { } } - @Ensures("BCF2Type.INTEGERS.contains(result)") - public final static BCF2Type determineIntegerType(final List values) { + @Ensures("result.isIntegerType()") + public static BCF2Type determineIntegerType(final List values) { BCF2Type maxType = BCF2Type.INT8; for ( final int value : values ) { final BCF2Type type1 = determineIntegerType(value); @@ -297,56 +275,9 @@ public final class BCF2Utils { * @param o * @return */ - public final static List toList(final Object o) { + public static List toList(final Object o) { if ( o == null ) return Collections.emptyList(); else if ( o instanceof List ) return (List)o; else return Collections.singletonList(o); } - - - @Requires({"stream != null", "bytesForEachInt > 0"}) - public final static int readInt(int bytesForEachInt, final InputStream stream) { - switch ( bytesForEachInt ) { - case 1: { - return (byte)(readByte(stream)); - } case 2: { - final int b2 = readByte(stream) & 0xFF; - final int b1 = readByte(stream) & 0xFF; - return (short)((b1 << 8) | b2); - } case 4: { - final int b4 = readByte(stream) & 0xFF; - final int b3 = readByte(stream) & 0xFF; - final int b2 = readByte(stream) & 0xFF; - final int b1 = readByte(stream) & 0xFF; - return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4); - } default: throw new ReviewedStingException("Unexpected size during decoding"); - } - } - - public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException { - switch ( type.getSizeInBytes() ) { - case 1: - encodeStream.write(0xFF & value); - break; - case 2: - encodeStream.write((0x00FF & value)); - encodeStream.write((0xFF00 & value) >> 8); - break; - case 4: - encodeStream.write((0x000000FF & value)); - encodeStream.write((0x0000FF00 & value) >> 8); - encodeStream.write((0x00FF0000 & value) >> 16); - encodeStream.write((0xFF000000 & value) >> 24); - break; - default: - throw new ReviewedStingException("BUG: unexpected type size " + type); - } -// general case for reference -// for ( int i = type.getSizeInBytes() - 1; i >= 0; i-- ) { -// final int shift = i * 8; -// int mask = 0xFF << shift; -// int byteValue = (mask & value) >> shift; -// encodeStream.write(byteValue); -// } - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java new file mode 100644 index 000000000..742da7c0c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java @@ -0,0 +1,80 @@ +package org.broadinstitute.sting.utils.codecs.bcf2; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; + +/** + * Simple holder for BCF version information + * + * User: depristo + * Date: 8/2/12 + * Time: 2:16 PM + */ +public class BCFVersion { + /** + * BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2) + * and m is the minor version, currently 1 + */ + public static final byte[] MAGIC_HEADER_START = "BCF".getBytes(); + + final int majorVersion; + final int minorVersion; + + public BCFVersion(int majorVersion, int minorVersion) { + this.majorVersion = majorVersion; + this.minorVersion = minorVersion; + } + + /** + * @return the major version number of this BCF file + */ + public int getMajorVersion() { + return majorVersion; + } + + /** + * @return the minor version number of this BCF file + */ + public int getMinorVersion() { + return minorVersion; + } + + /** + * Return a new BCFVersion object describing the major and minor version of the BCF file in stream + * + * Note that stream must be at the very start of the file. + * + * @param stream + * @return a BCFVersion object, or null if stream doesn't contain a BCF file + * @throws IOException + */ + public static BCFVersion readBCFVersion(final InputStream stream) throws IOException { + final byte[] magicBytes = new byte[MAGIC_HEADER_START.length]; + stream.read(magicBytes); + if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) { + // we're a BCF file + final int majorByte = stream.read(); + final int minorByte = stream.read(); + return new BCFVersion( majorByte, minorByte ); + } else + return null; + } + + /** + * Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions + * @param out + * @throws IOException + */ + public void write(final OutputStream out) throws IOException { + out.write(MAGIC_HEADER_START); + out.write(getMajorVersion() & 0xFF); + out.write(getMinorVersion() & 0xFF); + } + + @Override + public String toString() { + return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java index 3f72359fa..656b95e7a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java @@ -38,25 +38,41 @@ import java.util.*; import java.util.regex.Pattern; /** - * TODO GUILLERMO DEL ANGEL + * Codec for Beagle imputation engine * *

- * Codec Description + * Reads in tabular files with site markers and genotype posteriors, genotypes and phasing that Beagle produced *

* *

- * See also: @see VCF specification
+ * See also: @see BEAGLE home page
*

*

* - *

File format example

+ *

File format example for phased genotypes file

*
- *     line 1
- *     line 2
- *     line 3
+ *     dummy header
+ *      20:60251 T T T T T T
+ *      20:60321 G G G G G G
+ *      20:60467 G G G G G G
  * 
* + *

File format example for genotype posteriors

+ *
+ *     marker alleleA alleleB NA07056 NA07056 NA07056
+ *     20:60251 T C 0.9962 0.0038 0 0.99245 0.00755 0 0.99245 0.00755 0
+ *     20:60321 G T 0.98747 0.01253 0 0.99922 0.00078 0 0.99368 0.00632 0
+ *     20:60467 G C 0.97475 0.02525 0 0.98718 0.01282 0 0.98718 0.01282 0
+ * 
+ * + *

File format example for r2 file + *
+ *      20:60251        0.747
+ *      20:60321        0.763
+ *      20:60467        0.524
+ * 
+ *

* @author Mark DePristo * @since 2010 */ diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index b3420514b..043e5e185 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -237,7 +237,12 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec // parse out the required fields final String chr = getCachedString(parts[0]); builder.chr(chr); - int pos = Integer.valueOf(parts[1]); + int pos = -1; + try { + pos = Integer.valueOf(parts[1]); + } catch (NumberFormatException e) { + generateException(parts[1] + " is not a valid start position in the VCF format"); + } builder.start(pos); if ( parts[2].length() == 0 ) @@ -256,9 +261,20 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec final Map attrs = parseInfo(parts[7]); builder.attributes(attrs); + if ( attrs.containsKey(VCFConstants.END_KEY) ) { + // update stop with the end key if provided + try { + builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString())); + } catch (Exception e) { + generateException("the END value in the INFO field is not valid"); + } + } else { + builder.stop(pos + ref.length() - 1); + } + // get our alleles, filters, and setup an attribute map - final List rawAlleles = parseAlleles(ref, alts, lineNo); - final List alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs); + final List alleles = parseAlleles(ref, alts, lineNo); + builder.alleles(alleles); // do we have genotyping data if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) { @@ -275,7 +291,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec VariantContext vc = null; try { - builder.referenceBaseForIndel(ref.getBytes()[0]); vc = builder.make(); } catch (Exception e) { generateException(e.getMessage()); @@ -284,31 +299,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec return vc; } - private final List updateBuilderAllelesAndStop(final VariantContextBuilder builder, - final String ref, - final int pos, - final List rawAlleles, - final Map attrs) { - int endForSymbolicAlleles = pos; // by default we use the pos - if ( attrs.containsKey(VCFConstants.END_KEY) ) { - // update stop with the end key if provided - try { - endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()); - } catch (Exception e) { - generateException("the END value in the INFO field is not valid"); - } - } - - // find out our current location, and clip the alleles down to their minimum length - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles); - if ( clipped.getError() != null ) - generateException(clipped.getError(), lineNo); - - builder.stop(clipped.getStop()); - builder.alleles(clipped.getClippedAlleles()); - return clipped.getClippedAlleles(); - } - /** * get the name of this codec * @return our set name diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java deleted file mode 100644 index 40ba23d9d..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.codecs.vcf; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.*; - -import java.util.*; - -/** - * All of the gross allele clipping and padding routines in one place - * - * Having attempted to understand / fix / document this code myself - * I can only conclude that this entire approach needs to be rethought. This - * code just doesn't work robustly with symbolic alleles, with multiple alleles, - * requires a special "reference base for indels" stored in the VariantContext - * whose correctness isn't enforced, and overall has strange special cases - * all over the place. - * - * The reason this code is so complex is due to symbolics and multi-alleleic - * variation, which frequently occur when combining variants from multiple - * VCF files. - * - * TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles - * TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT) - * - * @author Mark DePristo - * @since 6/12 - */ -public final class VCFAlleleClipper { - private VCFAlleleClipper() { } - - /** - * Determine whether we should clip off the first base of all unclippped alleles or not - * - * Returns true if all of the alleles in unclippedAlleles share a common first base with - * ref0. Ref0 should be the first base of the reference allele UnclippedAlleles may - * contain the reference allele itself, or just the alternate alleles, it doesn't matter. - * - * The algorithm returns true if the first base should be clipped off, or false otherwise - * - * This algorithm works even in the presence of symbolic alleles, logically ignoring these - * values. It - * - * @param unclippedAlleles list of unclipped alleles to assay - * @param ref0 the first base of the reference allele - * @return true if we should clip the first base of unclippedAlleles - */ - @Requires("unclippedAlleles != null") - public static boolean shouldClipFirstBaseP(final List unclippedAlleles, - final byte ref0) { - boolean allSymbolicAlt = true; - - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) { - continue; - } - - // already know we aren't symbolic, so we only need to decide if we have only seen a ref - if ( ! a.isReference() ) - allSymbolicAlt = false; - - if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { - return false; - } - } - - // to reach here all alleles are consistent with clipping the first base matching ref0 - // but we don't clip if all ALT alleles are symbolic - return ! allSymbolicAlt; - } - - public static int computeReverseClipping(final List unclippedAlleles, - final byte[] ref, - final int forwardClipping, - final boolean allowFullClip) { - int clipping = 0; - boolean stillClipping = true; - - while ( stillClipping ) { - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) - continue; - - // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong - // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). - if ( a.length() - clipping == 0 ) - return clipping - (allowFullClip ? 0 : 1); - - if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { - stillClipping = false; - } - else if ( ref.length == clipping ) { - if ( allowFullClip ) - stillClipping = false; - else - return -1; - } - else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { - stillClipping = false; - } - } - if ( stillClipping ) - clipping++; - } - - return clipping; - } - - /** - * Are the alleles describing a polymorphism substitution one base for another? - * - * @param alleles a list of alleles, must not be empty - * @return Return true if the length of any allele in alleles isn't 1 - */ - @Requires("!alleles.isEmpty()") - private static boolean isSingleNucleotideEvent(final List alleles) { - for ( final Allele a : alleles ) { - if ( a.length() != 1 ) - return false; - } - return true; - } - - /** - * clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened - * - * The ClippedAlleles object contains the implied stop position of the alleles, given the provided start - * position, after clipping. It also contains the list of alleles, in the same order as the provided - * unclipped ones, that are the fully clipped version of the input alleles. If an error occurs - * during this option the getError() function returns a string describing the problem (for use in parsers). - * - * The basic operation are: - * - * single allele - * => stop == start and clipped == unclipped - * any number of single nucleotide events - * => stop == start and clipped == unclipped - * two alleles, second being symbolic - * => stop == start and clipped == unclipped - * Note in this case that the STOP should be computed by other means (from END in VCF, for example) - * Note that if there's more than two alleles and the second is a symbolic the code produces an error - * Any other case: - * The alleles are trimmed of any sequence shared at the end of the alleles. If N bases - * are common then the alleles will all be at least N bases shorter. - * The stop position returned is the start position + the length of the - * reverse trimmed only reference allele - 1. - * If the alleles all share a single common starting sequence (just one base is considered) - * then the alleles have this leading common base removed as well. - * - * TODO This code is gross and brittle and needs to be rethought from scratch - * - * @param start the unadjusted start position (pre-clipping) - * @param ref the reference string - * @param unclippedAlleles the list of unclipped alleles, including the reference allele - * @return the new reference end position of this event - */ - @Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"}) - @Ensures("result != null") - public static ClippedAlleles clipAlleles(final int start, - final String ref, - final List unclippedAlleles, - final int endForSymbolicAllele ) { - // no variation or single nucleotide events are by definition fully clipped - if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) ) - return new ClippedAlleles(start, unclippedAlleles, null); - - // we've got to sort out the clipping by looking at the alleles themselves - final byte firstRefBase = (byte) ref.charAt(0); - final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase); - final int forwardClipping = firstBaseIsClipped ? 1 : 0; - final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false); - final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0; - - if ( reverseClipping == -1 ) - return new ClippedAlleles("computeReverseClipping failed due to bad alleles"); - - boolean sawSymbolic = false; - List clippedAlleles; - if ( ! needsClipping ) { - // there's nothing to clip, so clippedAlleles are the original alleles - clippedAlleles = unclippedAlleles; - } else { - clippedAlleles = new ArrayList(unclippedAlleles.size()); - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) { - sawSymbolic = true; - clippedAlleles.add(a); - } else { - final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping); - if ( !Allele.acceptableAlleleBases(allele) ) - return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]"); - clippedAlleles.add(Allele.create(allele, a.isReference())); - } - } - } - - int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele); - - // TODO - // TODO - // TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1 - // TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED - // TODO - // TODO - if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++; - // TODO - // TODO - // TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1 - // TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED - // TODO - // TODO - - final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null; - return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel); - } - - /** - * Returns true if the alleles in inputVC should have reference bases added for padding - * - * We need to pad a VC with a common base if the length of the reference allele is - * less than the length of the VariantContext. This happens because the position of - * e.g. an indel is always one before the actual event (as per VCF convention). - * - * @param inputVC the VC to evaluate, cannot be null - * @return true if - */ - public static boolean needsPadding(final VariantContext inputVC) { - // biallelic sites with only symbolic never need padding - if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() ) - return false; - - final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1; - final int referenceLength = inputVC.getReference().length(); - - if ( referenceLength == recordLength ) - return false; - else if ( referenceLength == recordLength - 1 ) - return true; - else if ( !inputVC.hasSymbolicAlleles() ) - throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + - " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); - else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() ) - throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC); - return false; - } - - public static Allele padAllele(final VariantContext vc, final Allele allele) { - assert needsPadding(vc); - - if ( allele.isSymbolic() ) - return allele; - else { - // get bases for current allele and create a new one with trimmed bases - final StringBuilder sb = new StringBuilder(); - sb.append((char)vc.getReferenceBaseForIndel().byteValue()); - sb.append(allele.getDisplayString()); - final String newBases = sb.toString(); - return Allele.create(newBases, allele.isReference()); - } - } - - public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) { - final boolean padVC = needsPadding(inputVC); - - // nothing to do if we don't need to pad bases - if ( padVC ) { - if ( !inputVC.hasReferenceBaseForIndel() ) - throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); - - final ArrayList alleles = new ArrayList(inputVC.getNAlleles()); - final Map unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap(inputVC.getNAlleles()) : null; - - boolean paddedAtLeastOne = false; - for (final Allele a : inputVC.getAlleles()) { - final Allele padded = padAllele(inputVC, a); - paddedAtLeastOne = paddedAtLeastOne || padded != a; - alleles.add(padded); - if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make - } - - if ( ! paddedAtLeastOne ) - throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles()); - - final VariantContextBuilder vcb = new VariantContextBuilder(inputVC); - vcb.alleles(alleles); - - // the position of the inputVC is one further, if it doesn't contain symbolic alleles - vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd()); - - if ( inputVC.hasGenotypes() ) { - assert unpaddedToPadded != null; - - // now we can recreate new genotypes with trimmed alleles - final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); - for (final Genotype g : inputVC.getGenotypes() ) { - final List newGenotypeAlleles = new ArrayList(g.getAlleles().size()); - for (final Allele a : g.getAlleles()) { - newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make()); - } - vcb.genotypes(genotypes); - } - - return vcb.make(); - } - else - return inputVC; - - } - - public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { - // see if we need to trim common reference base from all alleles - - final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true); - if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) - return inputVC; - - final List alleles = new ArrayList(); - final GenotypesContext genotypes = GenotypesContext.create(); - final Map originalToTrimmedAlleleMap = new HashMap(); - - for (final Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); - final Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // now we can recreate new genotypes with trimmed alleles - for ( final Genotype genotype : inputVC.getGenotypes() ) { - final List originalAlleles = genotype.getAlleles(); - final List trimmedAlleles = new ArrayList(); - for ( final Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); - } - - return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make(); - } - - @Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both - public static class ClippedAlleles { - private final int stop; - private final List clippedAlleles; - private final Byte refBaseForIndel; - private final String error; - - @Requires({"stop > 0", "clippedAlleles != null"}) - private ClippedAlleles(final int stop, final List clippedAlleles, final Byte refBaseForIndel) { - this.stop = stop; - this.clippedAlleles = clippedAlleles; - this.error = null; - this.refBaseForIndel = refBaseForIndel; - } - - @Requires("error != null") - private ClippedAlleles(final String error) { - this.stop = -1; - this.clippedAlleles = null; - this.refBaseForIndel = null; - this.error = error; - } - - /** - * Get an error if it occurred - * @return the error message, or null if no error occurred - */ - public String getError() { - return error; - } - - /** - * Get the stop position to use after the clipping as been applied, given the - * provided position to clipAlleles - * @return - */ - public int getStop() { - return stop; - } - - /** - * Get the clipped alleles themselves - * @return the clipped alleles in the order of the input unclipped alleles - */ - public List getClippedAlleles() { - return clippedAlleles; - } - - /** - * Returns the reference base we should use for indels, or null if none is appropriate - * @return - */ - public Byte getRefBaseForIndel() { - return refBaseForIndel; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index f76939ca9..dac58eb10 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -36,6 +36,8 @@ public final class VCFConstants { public static final String MLE_ALLELE_COUNT_KEY = "MLEAC"; public static final String ALLELE_FREQUENCY_KEY = "AF"; public static final String MLE_ALLELE_FREQUENCY_KEY = "MLEAF"; + public static final String MLE_PER_SAMPLE_ALLELE_COUNT_KEY = "MLPSAC"; + public static final String MLE_PER_SAMPLE_ALLELE_FRACTION_KEY = "MLPSAF"; public static final String ALLELE_NUMBER_KEY = "AN"; public static final String RMS_BASE_QUALITY_KEY = "BQ"; public static final String CIGAR_KEY = "CIGAR"; @@ -119,4 +121,5 @@ public final class VCFConstants { public static final int MAX_GENOTYPE_QUAL = 99; public static final Double VCF_ENCODING_EPSILON = 0.00005; // when we consider fields equal(), used in the Qual compare + public static final String REFSAMPLE_DEPTH_KEY = "REFDEPTH"; } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 76cfe0018..bda03f675 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.exceptions; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -78,6 +79,12 @@ public class UserException extends ReviewedStingException { } } + public static class NotSupportedInGATKLite extends UserException { + public NotSupportedInGATKLite(String message) { + super(String.format("GATK Lite does not support all of the features of the full version: %s", message)); + } + } + // todo -- fix up exception cause passing public static class MissingArgument extends CommandLineException { public MissingArgument(String arg, String message) { @@ -310,9 +317,9 @@ public class UserException extends ReviewedStingException { - public static class MissingWalker extends UserException { - public MissingWalker(String walkerName, String message) { - super(String.format("Walker %s is not available: %s", walkerName, message)); + public static class DeprecatedWalker extends UserException { + public DeprecatedWalker(String walkerName, String version) { + super(String.format("Walker %s is no longer available in the GATK; it has been deprecated since version %s", walkerName, version)); } } @@ -345,8 +352,8 @@ public class UserException extends ReviewedStingException { public static class UnreadableKeyException extends UserException { public UnreadableKeyException ( File f, Exception e ) { super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", - f.getAbsolutePath(), getMessage(e))); + "Please see %s for help.", + f.getAbsolutePath(), getMessage(e), GATKRunReport.PHONE_HOME_DOCS_URL)); } public UnreadableKeyException ( String message, Exception e ) { @@ -355,8 +362,8 @@ public class UserException extends ReviewedStingException { public UnreadableKeyException ( String message ) { super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", - message)); + "Please see %s for help.", + message, GATKRunReport.PHONE_HOME_DOCS_URL)); } } @@ -364,9 +371,8 @@ public class UserException extends ReviewedStingException { public KeySignatureVerificationException ( File f ) { super(String.format("The signature in key file %s failed cryptographic verification. " + "If this key was valid in the past, it's likely been revoked. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + - "for help.", - f.getAbsolutePath())); + "Please see %s for help.", + f.getAbsolutePath(), GATKRunReport.PHONE_HOME_DOCS_URL)); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index c6eec24f1..2f31c154c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -4,7 +4,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -134,17 +134,36 @@ public class FragmentUtils { GATKSAMRecord firstRead = overlappingPair.get(0); GATKSAMRecord secondRead = overlappingPair.get(1); - if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) { + /* + System.out.println("read 0 unclipped start:"+overlappingPair.get(0).getUnclippedStart()); + System.out.println("read 0 unclipped end:"+overlappingPair.get(0).getUnclippedEnd()); + System.out.println("read 1 unclipped start:"+overlappingPair.get(1).getUnclippedStart()); + System.out.println("read 1 unclipped end:"+overlappingPair.get(1).getUnclippedEnd()); + System.out.println("read 0 start:"+overlappingPair.get(0).getAlignmentStart()); + System.out.println("read 0 end:"+overlappingPair.get(0).getAlignmentEnd()); + System.out.println("read 1 start:"+overlappingPair.get(1).getAlignmentStart()); + System.out.println("read 1 end:"+overlappingPair.get(1).getAlignmentEnd()); + */ + if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { firstRead = overlappingPair.get(1); // swap them secondRead = overlappingPair.get(0); } - if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) { + if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { return overlappingPair; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A } if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) { return overlappingPair; // fragments contain indels so don't merge them } +/* // check for inconsistent start positions between uncliped/soft alignment starts + if (secondRead.getAlignmentStart() >= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() < firstRead.getUnclippedStart()) + return overlappingPair; + if (secondRead.getAlignmentStart() <= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() > firstRead.getUnclippedStart()) + return overlappingPair; + + if (secondRead.getUnclippedStart() < firstRead.getAlignmentEnd() && secondRead.getAlignmentStart() >= firstRead.getAlignmentEnd()) + return overlappingPair; + */ final Pair pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart()); final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() ); diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java new file mode 100644 index 000000000..388e7ce45 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.google.gson.Gson; +import org.apache.commons.io.IOUtils; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.DefaultHttpClient; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.util.ArrayList; +import java.util.List; + +public class ForumAPIUtils { + /** + * How we post to the forum + */ + private final static String API_URL = "https://gatk.vanillaforums.com/"; + final private static String ACCESS_TOKEN = "access_token="; + + public static List getPostedTools(String forumKey) { + Gson gson = new Gson(); + List output = new ArrayList(); + + String text = httpGet(API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey); + + APIQuery details = gson.fromJson(text, APIQuery.class); + ForumDiscussion[] discussions = details.Discussions; + + for (ForumDiscussion post : discussions) { + output.add(post.Name); + } + + /* + System.out.println(details.isJsonArray()); + System.out.println(details.isJsonNull()); + System.out.println(details.isJsonObject()); + System.out.println(details.isJsonPrimitive()); + + JsonArray posted = details.getAsJsonPrimitive().get("Discussions").getAsJsonArray(); + + for ( JsonElement post : posted ) { + output.add( post.getAsJsonObject().get("Name").getAsString()); + } + */ + return output; + } + + + private static String httpGet(String urlStr) { + String output = ""; + + try { + + DefaultHttpClient httpClient = new DefaultHttpClient(); + HttpGet getRequest = new HttpGet(urlStr); + getRequest.addHeader("accept", "application/json"); + + HttpResponse response = httpClient.execute(getRequest); + + if (response.getStatusLine().getStatusCode() != 200) { + throw new RuntimeException("Failed : HTTP error code : " + + response.getStatusLine().getStatusCode()); + } + + output = IOUtils.toString(response.getEntity().getContent()); + + httpClient.getConnectionManager().shutdown(); + + } catch (ClientProtocolException e) { + + e.printStackTrace(); + + } catch (IOException e) { + + e.printStackTrace(); + } + return output; + } + + private static String httpPost(String data, String URL) { + try { + + DefaultHttpClient httpClient = new DefaultHttpClient(); + HttpPost postRequest = new HttpPost(URL); + + StringEntity input = new StringEntity(data); + input.setContentType("application/json"); + postRequest.setEntity(input); + + HttpResponse response = httpClient.execute(postRequest); + + if (response.getStatusLine().getStatusCode() != 200) { + throw new RuntimeException("Failed : HTTP error code : " + + response.getStatusLine().getStatusCode()); + } + + BufferedReader br = new BufferedReader( + new InputStreamReader((response.getEntity().getContent()))); + + String output = ""; + String line; + System.out.println("Output from Server .... \n"); + while ((line = br.readLine()) != null) { + output += (line + '\n'); + System.out.println(line); + } + + httpClient.getConnectionManager().shutdown(); + return output; + + } catch (MalformedURLException e) { + + e.printStackTrace(); + + } catch (IOException e) { + + e.printStackTrace(); + + } + return null; + } + + public static void postToForum(GATKDocWorkUnit tool, final String forumKey) { + + + ForumDiscussion post = new ForumDiscussion(tool); + + Gson gson = new Gson(); + + String data = gson.toJson(post.getPostData()); + httpPost(data, API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey); + + + } + + class APIQuery { + ForumDiscussion[] Discussions; + + public APIQuery() { + } + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ForumDiscussion.java b/public/java/src/org/broadinstitute/sting/utils/help/ForumDiscussion.java new file mode 100644 index 000000000..ac5d73946 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/ForumDiscussion.java @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import java.util.HashMap; +import java.util.Map; + +class ForumDiscussion { + + final private static String POST_TEMPLATE = "

A new tool has been released!

Check out the documentation at %s.

"; + + final int Announce; + final String Body; + final String Category; + final int Closed; + final String Format; + final String Name; + final int Sink; + final String Tags; + final String Type; + + public ForumDiscussion(String name, String body, String format, String category, + String tagsCSV, String type, int closed, int announce, int sink) { + this.Name = name; + this.Body = body; + this.Format = format; + this.Category = category; + this.Tags = tagsCSV; + this.Type = type; + this.Closed = closed; + this.Announce = announce; + this.Sink = sink; + } + + public ForumDiscussion(GATKDocWorkUnit tool) { + this(tool.name, + String.format(POST_TEMPLATE, GATKDocUtils.URL_ROOT_FOR_RELEASE_GATKDOCS + tool.filename, tool.name), + "Html", "tool-bulletin", tool.name + "," + tool.group + ",gatkdocs", "Discussion", 0, -1, -1); + } + + public Map getPostData() { + Map output = new HashMap(); + + output.put("Name", Name); + output.put("Body", Body); + output.put("Format", Format); + output.put("Category", Category); + if (Tags != null) + output.put("Tags", Tags); + if (Type != null) + output.put("Type", Type); + if (Closed != -1) + output.put("Closed", Closed == 1 ? "1" : "0"); + if (Announce != -1) + output.put("Announce", Announce == 1 ? "1" : "0"); + if (Sink != -1) + output.put("Sink", Sink == 1 ? "1" : "0"); + + return output; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java index cd645943b..4ec2ac6d7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -25,11 +25,17 @@ package org.broadinstitute.sting.utils.help; public class GATKDocUtils { - /** The URL root for RELEASED GATKDOC units */ - public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/"; - /** The URL root for STABLE GATKDOC units */ + /** + * The URL root for RELEASED GATKDOC units + */ + public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gatk/gatkdocs/"; + /** + * The URL root for STABLE GATKDOC units + */ public final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; - /** The URL root for UNSTABLE GATKDOC units */ + /** + * The URL root for UNSTABLE GATKDOC units + */ public final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; /** @@ -49,6 +55,7 @@ public class GATKDocUtils { /** * Returns a full URL http://etc/ linking to the documentation for class (assuming it * exists). Currently points to the RELEASE doc path only. + * * @param c * @return */ diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java index 41c855329..fd269737f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -31,37 +31,52 @@ import java.util.Map; /** * Simple collection of all relevant information about something the GATKDoclet can document - * + *

* Created by IntelliJ IDEA. * User: depristo * Date: 7/24/11 * Time: 7:59 PM */ class GATKDocWorkUnit implements Comparable { - /** The class that's being documented */ + /** + * The class that's being documented + */ final Class clazz; - /** The name of the thing we are documenting */ + /** + * The name of the thing we are documenting + */ final String name; - /** the filename where we will be writing the docs for this class */ + /** + * the filename where we will be writing the docs for this class + */ final String filename; - /** The name of the documentation group (e.g., walkers, read filters) class belongs to */ + /** + * The name of the documentation group (e.g., walkers, read filters) class belongs to + */ final String group; - /** The documentation handler for this class */ + /** + * The documentation handler for this class + */ final DocumentedGATKFeatureHandler handler; - /** The javadoc documentation for clazz */ + /** + * The javadoc documentation for clazz + */ final ClassDoc classDoc; - /** The annotation that lead to this Class being in GATKDoc */ + /** + * The annotation that lead to this Class being in GATKDoc + */ final DocumentedGATKFeatureObject annotation; - /** When was this walker built, and what's the absolute version number */ + /** + * When was this walker built, and what's the absolute version number + */ final String buildTimestamp, absoluteVersion; // set by the handler String summary; Map forTemplate; - public GATKDocWorkUnit(String name, String filename, String group, - DocumentedGATKFeatureObject annotation, DocumentedGATKFeatureHandler handler, - ClassDoc classDoc, Class clazz, + public GATKDocWorkUnit(String name, String filename, String group, DocumentedGATKFeatureObject annotation, + DocumentedGATKFeatureHandler handler, ClassDoc classDoc, Class clazz, String buildTimestamp, String absoluteVersion) { this.annotation = annotation; this.name = name; @@ -76,6 +91,7 @@ class GATKDocWorkUnit implements Comparable { /** * Called by the GATKDoclet to set handler provided context for this work unit + * * @param summary * @param forTemplate */ @@ -86,6 +102,7 @@ class GATKDocWorkUnit implements Comparable { /** * Return a String -> String map suitable for FreeMarker to create an index to this WorkUnit + * * @return */ public Map indexDataMap() { @@ -99,6 +116,7 @@ class GATKDocWorkUnit implements Comparable { /** * Sort in order of the name of this WorkUnit + * * @param other * @return */ diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 7f26f22f5..105e12872 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.walkers.qc.DocumentationTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; import java.io.*; import java.util.*; @@ -46,31 +47,36 @@ import java.util.*; * Javadoc Doclet that combines javadoc, GATK ParsingEngine annotations, and FreeMarker * templates to produce html formatted GATKDocs for walkers * and other classes. - * + *

* This document has the following workflow: - * + *

* 1 -- walk the javadoc heirarchy, looking for class that have the - * DocumentedGATKFeature annotation or are in the type heirarchy in the - * static list of things to document, and are to be documented + * DocumentedGATKFeature annotation or are in the type heirarchy in the + * static list of things to document, and are to be documented * 2 -- construct for each a GATKDocWorkUnit, resulting in the complete - * set of things to document + * set of things to document * 3 -- for each unit, actually generate an html page documenting it - * as well as links to related features via their units. Writing - * of a specific class HTML is accomplished by a generate DocumentationHandler + * as well as links to related features via their units. Writing + * of a specific class HTML is accomplished by a generate DocumentationHandler * 4 -- write out an index of all units, organized by group - * + *

* The documented classes are restricted to only those with @DocumentedGATKFeature * annotation or are in the STATIC_DOCS class. */ public class GATKDoclet { final protected static Logger logger = Logger.getLogger(GATKDoclet.class); - /** Where we find the help FreeMarker templates */ + /** + * Where we find the help FreeMarker templates + */ final protected static File SETTINGS_DIR = new File("settings/helpTemplates"); - /** Where we write the GATKDoc html directory */ + /** + * Where we write the GATKDoc html directory + */ final protected static File DESTINATION_DIR = new File("gatkdocs"); + final private static String FORUM_KEY_FILE = "/local/gsa-engineering/gatkdocs_publisher/forum.key"; // ---------------------------------------------------------------------- // // Global variables that are set on the command line by javadoc @@ -88,10 +94,14 @@ public class GATKDoclet { private static final List> testOnlyKeepers = Arrays.asList( DocumentationTest.class, CommandLineGATK.class, UserException.class); - /** The javadoc root doc */ + /** + * The javadoc root doc + */ RootDoc rootDoc; - /** The set of all things we are going to document */ + /** + * The set of all things we are going to document + */ Set myWorkUnits; /** @@ -103,6 +113,7 @@ public class GATKDoclet { * codecs). */ final static Collection STATIC_DOCS = new ArrayList(); + static { STATIC_DOCS.add(new DocumentedGATKFeatureObject(FeatureCodec.class, "Reference ordered data (ROD) codecs", @@ -112,6 +123,7 @@ public class GATKDoclet { /** * Extracts the contents of certain types of javadoc and adds them to an XML file. + * * @param rootDoc The documentation root. * @return Whether the JavaDoc run succeeded. * @throws java.io.IOException if output can't be written. @@ -120,12 +132,12 @@ public class GATKDoclet { logger.setLevel(Level.INFO); // load arguments - for(String[] options: rootDoc.options()) { - if(options[0].equals("-build-timestamp")) + for (String[] options : rootDoc.options()) { + if (options[0].equals("-build-timestamp")) buildTimestamp = options[1]; if (options[0].equals("-absolute-version")) absoluteVersion = options[1]; - if (options[0].equals("-include-hidden")) + if (options[0].equals("-include -hidden")) showHiddenFeatures = true; if (options[0].equals("-test")) testOnly = true; @@ -134,20 +146,22 @@ public class GATKDoclet { // process the docs new GATKDoclet().processDocs(rootDoc); + return true; } /** * Validate the given options against options supported by this doclet. + * * @param option Option to validate. * @return Number of potential parameters; 0 if not supported. */ public static int optionLength(String option) { - if(option.equals("-build-timestamp") || + if (option.equals("-build-timestamp") || option.equals("-absolute-version") || option.equals("-include-hidden")) { return 2; - } else if ( option.equals("-test") ) + } else if (option.equals("-test")) return 1; else return 0; @@ -155,6 +169,7 @@ public class GATKDoclet { /** * Are we supposed to include @Hidden annotations in our documented output? + * * @return */ public boolean showHiddenFeatures() { @@ -162,7 +177,6 @@ public class GATKDoclet { } /** - * * @param rootDoc */ private void processDocs(RootDoc rootDoc) { @@ -172,7 +186,11 @@ public class GATKDoclet { try { // basic setup DESTINATION_DIR.mkdirs(); - FileUtils.copyFile(new File(SETTINGS_DIR + "/style.css"), new File(DESTINATION_DIR + "/style.css")); + FileUtils.copyFile(new File(SETTINGS_DIR + "/bootstrap.min.css"), new File(DESTINATION_DIR + "/bootstrap.min.css")); + FileUtils.copyFile(new File(SETTINGS_DIR + "/bootstrap.min.js"), new File(DESTINATION_DIR + "/bootstrap.min.js")); + FileUtils.copyFile(new File(SETTINGS_DIR + "/jquery.min.js"), new File(DESTINATION_DIR + "/jquery.min.js")); + // print the Version number + FileUtils.writeByteArrayToFile(new File(DESTINATION_DIR + "/current.version.txt"), getSimpleVersion(absoluteVersion).getBytes()); /* ------------------------------------------------------------------- */ /* You should do this ONLY ONCE in the whole application life-cycle: */ @@ -184,31 +202,78 @@ public class GATKDoclet { cfg.setObjectWrapper(new DefaultObjectWrapper()); myWorkUnits = computeWorkUnits(); - for ( GATKDocWorkUnit workUnit : myWorkUnits ) { - processDocWorkUnit(cfg, workUnit); + + List> groups = new ArrayList>(); + Set seenDocumentationFeatures = new HashSet(); + List> data = new ArrayList>(); + for (GATKDocWorkUnit workUnit : myWorkUnits) { + data.add(workUnit.indexDataMap()); + if (!seenDocumentationFeatures.contains(workUnit.annotation.groupName())) { + groups.add(toMap(workUnit.annotation)); + seenDocumentationFeatures.add(workUnit.annotation.groupName()); + } + } + + for (GATKDocWorkUnit workUnit : myWorkUnits) { + processDocWorkUnit(cfg, workUnit, groups, data); } processIndex(cfg, new ArrayList(myWorkUnits)); - } catch ( FileNotFoundException e ) { + + File forumKeyFile = new File(FORUM_KEY_FILE); + if (forumKeyFile.exists()) { + String forumKey = null; + // Read ing a one-line file so we can do a for loop + for (String line : new XReadLines(forumKeyFile)) + forumKey = line; + updateForum(myWorkUnits, forumKey); + } + } catch (FileNotFoundException e) { throw new RuntimeException(e); - } catch ( IOException e ) { + } catch (IOException e) { throw new RuntimeException(e); } } + private void updateForum(Set docWorkUnits, String forumKey) { + //first get list of posts that need to be added + List old = ForumAPIUtils.getPostedTools(forumKey); + + for (String s : old) + System.out.println(s); + + System.out.printf("Forum has %d items%n", old.size()); + System.out.printf("Docs have %d items%n", docWorkUnits.size()); + + List toAdd = new ArrayList(); + for (GATKDocWorkUnit tool : docWorkUnits) { + if (!old.contains(tool.name)) { + System.out.println("WILL POST: " + tool.name + " TO FORUM"); + toAdd.add(tool); + } + } + + //update using list + for (GATKDocWorkUnit tool : toAdd) { + //if ( tool.name.equals("ApplyRecalibration") ) + ForumAPIUtils.postToForum(tool, forumKey); + } + } + /** * Returns the set of all GATKDocWorkUnits that we are going to generate docs for. + * * @return */ private Set computeWorkUnits() { TreeSet m = new TreeSet(); - for ( ClassDoc doc : rootDoc.classes() ) { + for (ClassDoc doc : rootDoc.classes()) { //logger.debug("Considering " + doc); Class clazz = getClassForClassDoc(doc); // don't add anything that's not DocumentationTest if we are in test mode - if ( clazz != null && testOnly && ! testOnlyKeepers.contains(clazz) ) + if (clazz != null && testOnly && !testOnlyKeepers.contains(clazz)) continue; //if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance")) @@ -216,12 +281,11 @@ public class GATKDoclet { DocumentedGATKFeatureObject feature = getFeatureForClassDoc(doc); DocumentedGATKFeatureHandler handler = createHandler(doc, feature); - if ( handler != null && handler.includeInDocs(doc) ) { + if (handler != null && handler.includeInDocs(doc)) { logger.info("Generating documentation for class " + doc); String filename = handler.getDestinationFilename(doc, clazz); GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(), - filename, feature.groupName(), - feature, handler, doc, clazz, + filename, feature.groupName(), feature, handler, doc, clazz, buildTimestamp, absoluteVersion); m.add(unit); } @@ -233,13 +297,14 @@ public class GATKDoclet { /** * Create a handler capable of documenting the class doc according to feature. Returns * null if no appropriate handler is found or doc shouldn't be documented at all. + * * @param doc * @param feature * @return */ private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeatureObject feature) { - if ( feature != null ) { - if ( feature.enable() ) { + if (feature != null) { + if (feature.enable()) { DocumentedGATKFeatureHandler handler = new GenericDocumentationHandler(); handler.setDoclet(this); return handler; @@ -261,15 +326,15 @@ public class GATKDoclet { private DocumentedGATKFeatureObject getFeatureForClassDoc(ClassDoc doc) { Class docClass = getClassForClassDoc(doc); - if ( docClass == null ) + if (docClass == null) return null; // not annotated so it shouldn't be documented - if ( docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) { + if (docClass.isAnnotationPresent(DocumentedGATKFeature.class)) { DocumentedGATKFeature f = docClass.getAnnotation(DocumentedGATKFeature.class); return new DocumentedGATKFeatureObject(docClass, f.enable(), f.groupName(), f.summary(), f.extraDocs()); } else { - for ( DocumentedGATKFeatureObject staticDocs : STATIC_DOCS ) { - if ( staticDocs.getClassToDoc().isAssignableFrom(docClass) ) { + for (DocumentedGATKFeatureObject staticDocs : STATIC_DOCS) { + if (staticDocs.getClassToDoc().isAssignableFrom(docClass)) { return new DocumentedGATKFeatureObject(docClass, staticDocs.enable(), staticDocs.groupName(), staticDocs.summary(), staticDocs.extraDocs()); } } @@ -279,26 +344,28 @@ public class GATKDoclet { /** * Return the Java class described by the ClassDoc doc + * * @param doc * @return */ private Class getClassForClassDoc(ClassDoc doc) { try { // todo -- what do I need the ? extends Object to pass the compiler? - return (Class)HelpUtils.getClassForDoc(doc); - } catch ( ClassNotFoundException e) { + return (Class) HelpUtils.getClassForDoc(doc); + } catch (ClassNotFoundException e) { //logger.warn("Couldn't find class for ClassDoc " + doc); // we got a classdoc for a class we can't find. Maybe in a library or something return null; - } catch ( NoClassDefFoundError e ) { + } catch (NoClassDefFoundError e) { return null; - } catch ( UnsatisfiedLinkError e) { + } catch (UnsatisfiedLinkError e) { return null; // naughty BWA bindings } } /** * Create the html index listing all of the GATKDocs features + * * @param cfg * @param indexData * @throws IOException @@ -312,7 +379,7 @@ public class GATKDoclet { try { temp.process(groupIndexData(indexData), out); out.flush(); - } catch ( TemplateException e ) { + } catch (TemplateException e) { throw new ReviewedStingException("Failed to create GATK documentation", e); } } @@ -320,6 +387,7 @@ public class GATKDoclet { /** * Helpful function to create the html index. Given all of the already run GATKDocWorkUnits, * create the high-level grouping data listing individual features by group. + * * @param indexData * @return */ @@ -334,9 +402,9 @@ public class GATKDoclet { List> groups = new ArrayList>(); Set seenDocumentationFeatures = new HashSet(); List> data = new ArrayList>(); - for ( GATKDocWorkUnit workUnit : indexData ) { + for (GATKDocWorkUnit workUnit : indexData) { data.add(workUnit.indexDataMap()); - if ( ! seenDocumentationFeatures.contains(workUnit.annotation.groupName()) ) { + if (!seenDocumentationFeatures.contains(workUnit.annotation.groupName())) { groups.add(toMap(workUnit.annotation)); seenDocumentationFeatures.add(workUnit.annotation.groupName()); } @@ -352,11 +420,13 @@ public class GATKDoclet { /** * Trivial helper routine that returns the map of name and summary given the annotation + * * @param annotation * @return */ private static final Map toMap(DocumentedGATKFeatureObject annotation) { Map root = new HashMap(); + root.put("id", annotation.groupName().replaceAll("\\W", "")); root.put("name", annotation.groupName()); root.put("summary", annotation.summary()); return root; @@ -364,18 +434,20 @@ public class GATKDoclet { /** * Helper function that finding the GATKDocWorkUnit associated with class from among all of the work units + * * @param c the class we are looking for * @return the GATKDocWorkUnit whose .clazz.equals(c), or null if none could be found */ public final GATKDocWorkUnit findWorkUnitForClass(Class c) { - for ( final GATKDocWorkUnit unit : this.myWorkUnits ) - if ( unit.clazz.equals(c) ) + for (final GATKDocWorkUnit unit : this.myWorkUnits) + if (unit.clazz.equals(c)) return unit; return null; } /** * Return the ClassDoc associated with clazz + * * @param clazz * @return */ @@ -388,14 +460,16 @@ public class GATKDoclet { * * @param cfg * @param unit + * @param data * @throws IOException */ - private void processDocWorkUnit(Configuration cfg, GATKDocWorkUnit unit) + private void processDocWorkUnit(Configuration cfg, GATKDocWorkUnit unit, List> groups, List> data) throws IOException { //System.out.printf("Processing documentation for class %s%n", unit.classDoc); unit.handler.processOne(unit); - + unit.forTemplate.put("groups", groups); + unit.forTemplate.put("data", data); // Get or create a template Template temp = cfg.getTemplate(unit.handler.getTemplateName(unit.classDoc)); @@ -405,8 +479,20 @@ public class GATKDoclet { Writer out = new OutputStreamWriter(new FileOutputStream(outputPath)); temp.process(unit.forTemplate, out); out.flush(); - } catch ( TemplateException e ) { + } catch (TemplateException e) { throw new ReviewedStingException("Failed to create GATK documentation", e); } } + + private static String getSimpleVersion(String absoluteVersion) { + String[] parts = absoluteVersion.split("-"); + + // by skipping i=0, there is no trailing separator + for (int i = 1; i < 2; i++) { + parts[0] = parts[0].concat("-"); + parts[0] = parts[0].concat(parts[i]); + } + + return parts[0]; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 20f9eccd3..69d2e7c9e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -28,14 +28,11 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import com.sun.javadoc.ClassDoc; import com.sun.javadoc.FieldDoc; -import com.sun.javadoc.RootDoc; import com.sun.javadoc.Tag; import org.apache.log4j.Logger; import org.broad.tribble.Feature; -import org.broad.tribble.bed.FullBEDFeature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -43,7 +40,7 @@ import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; -import java.io.*; +import java.io.IOException; import java.lang.reflect.*; import java.util.*; @@ -59,16 +56,18 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { */ private static final int MAX_DISPLAY_NAME = 30; - /** The Class we are documenting */ + /** + * The Class we are documenting + */ private GATKDocWorkUnit toProcess; @Override public boolean includeInDocs(ClassDoc doc) { try { Class type = HelpUtils.getClassForDoc(doc); - boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class); - return ! hidden && JVMUtils.isConcrete(type); - } catch ( ClassNotFoundException e ) { + boolean hidden = !getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class); + return !hidden && JVMUtils.isConcrete(type); + } catch (ClassNotFoundException e) { return false; } } @@ -89,8 +88,9 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { addHighLevelBindings(root); addArgumentBindings(root); addRelatedBindings(root); + root.put("group", toProcess.group); - toProcess.setHandlerContent((String)root.get("summary"), root); + toProcess.setHandlerContent((String) root.get("summary"), root); } /** @@ -104,34 +104,36 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { // Extract overrides from the doc tags. StringBuilder summaryBuilder = new StringBuilder(); - for(Tag tag: toProcess.classDoc.firstSentenceTags()) + for (Tag tag : toProcess.classDoc.firstSentenceTags()) summaryBuilder.append(tag.text()); root.put("summary", summaryBuilder.toString()); root.put("description", toProcess.classDoc.commentText().substring(summaryBuilder.toString().length())); root.put("timestamp", toProcess.buildTimestamp); root.put("version", toProcess.absoluteVersion); - for(Tag tag: toProcess.classDoc.tags()) { + for (Tag tag : toProcess.classDoc.tags()) { root.put(tag.name(), tag.text()); } } /** * Add bindings describing related GATK capabilites to toProcess + * * @param root */ protected void addRelatedBindings(Map root) { List> extraDocsData = new ArrayList>(); // add in all of the explicitly related items - for ( final Class extraDocClass : toProcess.annotation.extraDocs() ) { + for (final Class extraDocClass : toProcess.annotation.extraDocs()) { final GATKDocWorkUnit otherUnit = getDoclet().findWorkUnitForClass(extraDocClass); - if ( otherUnit == null ) + if (otherUnit == null) throw new ReviewedStingException("Requested extraDocs for class without any documentation: " + extraDocClass); extraDocsData.add( - new HashMap(){{ + new HashMap() {{ put("filename", otherUnit.filename); - put("name", otherUnit.name);}}); + put("name", otherUnit.name); + }}); } root.put("extradocs", extraDocsData); @@ -149,16 +151,16 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { root.put("arguments", args); try { // loop over all of the arguments according to the parsing engine - for ( final ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(toProcess.classDoc)) ) { + for (final ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(toProcess.classDoc))) { // todo -- why can you have multiple ones? ArgumentDefinition argDef = argumentSource.createArgumentDefinitions().get(0); FieldDoc fieldDoc = getFieldDoc(toProcess.classDoc, argumentSource.field.getName()); Map argBindings = docForArgument(fieldDoc, argumentSource, argDef); - if ( ! argumentSource.isHidden() || getDoclet().showHiddenFeatures() ) { + if (!argumentSource.isHidden() || getDoclet().showHiddenFeatures()) { final String kind = docKindOfArg(argumentSource); final Object value = argumentValue(toProcess.clazz, argumentSource); - if ( value != null ) + if (value != null) argBindings.put("defaultValue", prettyPrintValueString(value)); args.get(kind).add(argBindings); @@ -167,31 +169,33 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { } // sort the arguments - for (Map.Entry>> entry : args.entrySet()) { + for (Map.Entry>> entry : args.entrySet()) { entry.setValue(sortArguments(entry.getValue())); } - } catch ( ClassNotFoundException e ) { + } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } /** * Return the argument kind (required, advanced, hidden, etc) of this argumentSource + * * @param argumentSource * @return */ @Requires("argumentSource != null") @Ensures("result != null") private String docKindOfArg(ArgumentSource argumentSource) { - if ( argumentSource.isRequired() ) return "required"; - else if ( argumentSource.isAdvanced() ) return "advanced"; - else if ( argumentSource.isHidden() ) return "hidden"; - else if ( argumentSource.isDeprecated() ) return "depreciated"; + if (argumentSource.isRequired()) return "required"; + else if (argumentSource.isAdvanced()) return "advanced"; + else if (argumentSource.isHidden()) return "hidden"; + else if (argumentSource.isDeprecated()) return "depreciated"; else return "optional"; } /** * Attempts to determine the value of argumentSource in an instantiated version of c + * * @param c * @param argumentSource * @return value of argumentSource, or null if this isn't possible @@ -201,12 +205,12 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { // get the value of the field // attempt to instantiate the class final Object instance = makeInstanceIfPossible(toProcess.clazz); - if ( instance != null ) { + if (instance != null) { final Object value = getFieldValue(instance, argumentSource.field.getName()); - if ( value != null ) + if (value != null) return value; - if ( argumentSource.createsTypeDefault() ) { + if (argumentSource.createsTypeDefault()) { try { // handle the case where there's an implicit default return argumentSource.typeDefaultDocString(); } catch (ReviewedStingException e) { @@ -220,6 +224,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * Create the argument map for holding class arguments + * * @return */ private Map>> createArgumentMap() { @@ -236,6 +241,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * Sorts the individual argument list in unsorted according to CompareArgumentsByName + * * @param unsorted * @return */ @@ -254,9 +260,9 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { private String elt(Map m) { String v = m.get("name").toString().toLowerCase(); - if ( v.startsWith("--") ) + if (v.startsWith("--")) return v.substring(2); - else if ( v.startsWith("-") ) + else if (v.startsWith("-")) return v.substring(1); else throw new RuntimeException("Expect to see arguments beginning with at least one -, but found " + v); @@ -267,7 +273,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { * Utility function that finds the value of fieldName in any fields of ArgumentCollection fields in * instance of class c. * - * @param instance the object to query for the field value + * @param instance the object to query for the field value * @param fieldName the name of the field we are looking for in instance * @return The value assigned to field in the ArgumentCollection, otherwise null */ @@ -280,14 +286,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { // @ArgumentCollection // protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); // - for ( Field field : JVMUtils.getAllFields(instance.getClass()) ) { - if ( field.isAnnotationPresent(ArgumentCollection.class) ) { + for (Field field : JVMUtils.getAllFields(instance.getClass())) { + if (field.isAnnotationPresent(ArgumentCollection.class)) { //System.out.printf("Searching for %s in argument collection field %s%n", fieldName, field); Object fieldValue = JVMUtils.getFieldValue(field, instance); Object value = getFieldValue(fieldValue, fieldName); - if ( value != null ) + if (value != null) return value; - } else if ( field.getName().equals(fieldName) ) { + } else if (field.getName().equals(fieldName)) { return JVMUtils.getFieldValue(field, instance); } } @@ -297,38 +303,39 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * Pretty prints value - * + *

* Assumes value != null + * * @param value * @return */ private Object prettyPrintValueString(Object value) { - if ( value.getClass().isArray() ) { + if (value.getClass().isArray()) { Class type = value.getClass().getComponentType(); - if ( boolean.class.isAssignableFrom(type) ) - return Arrays.toString((boolean[])value); - if ( byte.class.isAssignableFrom(type) ) - return Arrays.toString((byte[])value); - if ( char.class.isAssignableFrom(type) ) - return Arrays.toString((char[])value); - if ( double.class.isAssignableFrom(type) ) - return Arrays.toString((double[])value); - if ( float.class.isAssignableFrom(type) ) - return Arrays.toString((float[])value); - if ( int.class.isAssignableFrom(type) ) - return Arrays.toString((int[])value); - if ( long.class.isAssignableFrom(type) ) - return Arrays.toString((long[])value); - if ( short.class.isAssignableFrom(type) ) - return Arrays.toString((short[])value); - if ( Object.class.isAssignableFrom(type) ) - return Arrays.toString((Object[])value); + if (boolean.class.isAssignableFrom(type)) + return Arrays.toString((boolean[]) value); + if (byte.class.isAssignableFrom(type)) + return Arrays.toString((byte[]) value); + if (char.class.isAssignableFrom(type)) + return Arrays.toString((char[]) value); + if (double.class.isAssignableFrom(type)) + return Arrays.toString((double[]) value); + if (float.class.isAssignableFrom(type)) + return Arrays.toString((float[]) value); + if (int.class.isAssignableFrom(type)) + return Arrays.toString((int[]) value); + if (long.class.isAssignableFrom(type)) + return Arrays.toString((long[]) value); + if (short.class.isAssignableFrom(type)) + return Arrays.toString((short[]) value); + if (Object.class.isAssignableFrom(type)) + return Arrays.toString((Object[]) value); else throw new RuntimeException("Unexpected array type in prettyPrintValue. Value was " + value + " type is " + type); - } else if ( RodBinding.class.isAssignableFrom(value.getClass() ) ) { + } else if (RodBinding.class.isAssignableFrom(value.getClass())) { // annoying special case to handle the UnBound() constructor return "none"; - } else if ( value instanceof String ) { + } else if (value instanceof String) { return value.equals("") ? "\"\"" : value; } else { return value.toString(); @@ -337,6 +344,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * Attempt to instantiate class c, if possible. Returns null if this proves impossible. + * * @param c * @return */ @@ -344,21 +352,22 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { Object instance = null; try { // don't try to make something where we will obviously fail - if (! c.isEnum() && ! c.isAnnotation() && ! c.isAnonymousClass() && - ! c.isArray() && ! c.isPrimitive() & JVMUtils.isConcrete(c) ) { + if (!c.isEnum() && !c.isAnnotation() && !c.isAnonymousClass() && + !c.isArray() && !c.isPrimitive() & JVMUtils.isConcrete(c)) { instance = c.newInstance(); //System.out.printf("Created object of class %s => %s%n", c, instance); return instance; } else return null; + } catch (IllegalAccessException e) { + } catch (InstantiationException e) { + } catch (ExceptionInInitializerError e) { + } catch (SecurityException e) { } - catch (IllegalAccessException e ) { } - catch (InstantiationException e ) { } - catch (ExceptionInInitializerError e ) { } - catch (SecurityException e ) { } // this last one is super dangerous, but some of these methods catch ClassNotFoundExceptions // and rethrow then as RuntimeExceptions - catch (RuntimeException e) {} + catch (RuntimeException e) { + } return instance; } @@ -366,6 +375,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * Create an instance of the GATK parsing engine, for argument processing with GATKDoclet + * * @return */ private ParsingEngine createStandardGATKParsingEngine() { @@ -392,6 +402,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * Recursive helper routine to getFieldDoc() + * * @param classDoc * @param name * @param primary @@ -399,21 +410,21 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { */ private FieldDoc getFieldDoc(ClassDoc classDoc, String name, boolean primary) { //System.out.printf("Looking for %s in %s%n", name, classDoc.name()); - for ( FieldDoc fieldDoc : classDoc.fields(false) ) { + for (FieldDoc fieldDoc : classDoc.fields(false)) { //System.out.printf("fieldDoc " + fieldDoc + " name " + fieldDoc.name()); - if ( fieldDoc.name().equals(name) ) + if (fieldDoc.name().equals(name)) return fieldDoc; Field field = HelpUtils.getFieldForFieldDoc(fieldDoc); - if ( field == null ) + if (field == null) throw new RuntimeException("Could not find the field corresponding to " + fieldDoc + ", presumably because the field is inaccessible"); - if ( field.isAnnotationPresent(ArgumentCollection.class) ) { + if (field.isAnnotationPresent(ArgumentCollection.class)) { ClassDoc typeDoc = getRootDoc().classNamed(fieldDoc.type().qualifiedTypeName()); - if ( typeDoc == null ) + if (typeDoc == null) throw new ReviewedStingException("Tried to get javadocs for ArgumentCollection field " + fieldDoc + " but could't find the class in the RootDoc"); else { FieldDoc result = getFieldDoc(typeDoc, name, false); - if ( result != null ) + if (result != null) return result; // else keep searching } @@ -421,11 +432,11 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { } // if we didn't find it here, wander up to the superclass to find the field - if ( classDoc.superclass() != null ) { + if (classDoc.superclass() != null) { return getFieldDoc(classDoc.superclass(), name, false); } - if ( primary ) + if (primary) throw new RuntimeException("No field found for expected field " + name); else return null; @@ -439,20 +450,20 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { * @param s1 the short argument name without -, or null if not provided * @param s2 the long argument name without --, or null if not provided * @return A pair of fully qualified names (with - or --) for the argument. The first - * element is the primary display name while the second (potentially null) is a - * synonymous name. + * element is the primary display name while the second (potentially null) is a + * synonymous name. */ Pair displayNames(String s1, String s2) { s1 = s1 == null ? null : "-" + s1; s2 = s2 == null ? null : "--" + s2; - if ( s1 == null ) return new Pair(s2, null); - if ( s2 == null ) return new Pair(s1, null); + if (s1 == null) return new Pair(s2, null); + if (s2 == null) return new Pair(s1, null); String l = s1.length() > s2.length() ? s1 : s2; String s = s1.length() > s2.length() ? s2 : s1; - if ( l.length() > MAX_DISPLAY_NAME ) + if (l.length() > MAX_DISPLAY_NAME) return new Pair(s, l); else return new Pair(l, s); @@ -460,7 +471,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * Returns a human readable string that describes the Type type of a GATK argument. - * + *

* This will include parameterized types, so that Set{T} shows up as Set(T) and not * just Set in the docs. * @@ -469,13 +480,13 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { */ protected String argumentTypeString(Type type) { if (type instanceof ParameterizedType) { - ParameterizedType parameterizedType = (ParameterizedType)type; + ParameterizedType parameterizedType = (ParameterizedType) type; List subs = new ArrayList(); - for (Type actualType: parameterizedType.getActualTypeArguments()) + for (Type actualType : parameterizedType.getActualTypeArguments()) subs.add(argumentTypeString(actualType)); - return argumentTypeString(((ParameterizedType)type).getRawType()) + "[" + Utils.join(",", subs) + "]"; + return argumentTypeString(((ParameterizedType) type).getRawType()) + "[" + Utils.join(",", subs) + "]"; } else if (type instanceof GenericArrayType) { - return argumentTypeString(((GenericArrayType)type).getGenericComponentType()) + "[]"; + return argumentTypeString(((GenericArrayType) type).getGenericComponentType()) + "[]"; } else if (type instanceof WildcardType) { throw new RuntimeException("We don't support wildcards in arguments: " + type); } else if (type instanceof Class) { @@ -489,18 +500,19 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { * Helper routine that returns the Feature.class required by a RodBinding, * either T for RodBinding{T} or List{RodBinding{T}}. Returns null if * the Type doesn't fit either model. + * * @param type * @return */ protected Class getFeatureTypeIfPossible(Type type) { - if ( type instanceof ParameterizedType) { - ParameterizedType paramType = (ParameterizedType)type; - if ( RodBinding.class.isAssignableFrom((Class)paramType.getRawType()) ) { - return (Class)JVMUtils.getParameterizedTypeClass(type); + if (type instanceof ParameterizedType) { + ParameterizedType paramType = (ParameterizedType) type; + if (RodBinding.class.isAssignableFrom((Class) paramType.getRawType())) { + return (Class) JVMUtils.getParameterizedTypeClass(type); } else { - for ( Type paramtype : paramType.getActualTypeArguments() ) { + for (Type paramtype : paramType.getActualTypeArguments()) { Class x = getFeatureTypeIfPossible(paramtype); - if ( x != null ) + if (x != null) return x; } } @@ -512,6 +524,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * High-level entry point for creating a FreeMarker map describing the GATK argument * source with definition def, with associated javadoc fieldDoc. + * * @param fieldDoc * @param source * @param def @@ -521,9 +534,9 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { Map root = new HashMap(); Pair names = displayNames(def.shortName, def.fullName); - root.put("name", names.getFirst() ); + root.put("name", names.getFirst()); - if ( names.getSecond() != null ) + if (names.getSecond() != null) root.put("synonyms", names.getSecond()); root.put("required", def.required ? "yes" : "no"); @@ -532,11 +545,11 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { root.put("type", argumentTypeString(source.field.getGenericType())); Class featureClass = getFeatureTypeIfPossible(source.field.getGenericType()); - if ( featureClass != null ) { + if (featureClass != null) { // deal with the allowable types FeatureManager manager = new FeatureManager(); List rodTypes = new ArrayList(); - for (FeatureManager.FeatureDescriptor descriptor : manager.getByFeature(featureClass) ) { + for (FeatureManager.FeatureDescriptor descriptor : manager.getByFeature(featureClass)) { rodTypes.add(String.format("%s", GATKDocUtils.htmlFilenameForClass(descriptor.getCodecClass()), descriptor.getName())); @@ -550,14 +563,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { root.put("fulltext", fieldDoc.commentText()); // What are our enum options? - if ( def.validOptions != null ) + if (def.validOptions != null) root.put("options", docForEnumArgument(source.field.getType())); // general attributes List attributes = new ArrayList(); - if ( def.required ) attributes.add("required"); - if ( source.isDeprecated() ) attributes.add("depreciated"); - if ( attributes.size() > 0 ) + if (def.required) attributes.add("required"); + if (source.isDeprecated()) attributes.add("depreciated"); + if (attributes.size() > 0) root.put("attributes", Utils.join(", ", attributes)); return root; @@ -566,21 +579,23 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { /** * Helper routine that provides a FreeMarker map for an enumClass, grabbing the * values of the enum and their associated javadoc documentation. + * * @param enumClass * @return */ @Requires("enumClass.isEnum()") private List> docForEnumArgument(Class enumClass) { ClassDoc doc = this.getDoclet().getClassDocForClass(enumClass); - if ( doc == null ) // || ! doc.isEnum() ) + if (doc == null) // || ! doc.isEnum() ) throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got instead: " + doc); List> bindings = new ArrayList>(); - for (final FieldDoc field : doc.fields(false) ) { + for (final FieldDoc field : doc.fields(false)) { bindings.add( - new HashMap(){{ + new HashMap() {{ put("name", field.name()); - put("summary", field.commentText());}}); + put("summary", field.commentText()); + }}); } return bindings; diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 6ee4af288..85e9f362d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -681,8 +681,8 @@ public class IntervalUtils { LinkedHashMap> locsByContig = splitByContig(sorted); List expanded = new ArrayList(); - for (String contig: locsByContig.keySet()) { - List contigLocs = locsByContig.get(contig); + for (Map.Entry> contig: locsByContig.entrySet()) { + List contigLocs = contig.getValue(); int contigLocsSize = contigLocs.size(); GenomeLoc startLoc, stopLoc; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 27226ba22..c09eb0063 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -25,11 +25,14 @@ package org.broadinstitute.sting.utils.recalibration; -import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import net.sf.samtools.SAMTag; +import net.sf.samtools.SAMUtils; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -49,7 +52,9 @@ public class BaseRecalibration { private final RecalibrationTables recalibrationTables; private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation - private final boolean noIndelQuals; + private final boolean disableIndelQuals; + private final int preserveQLessThan; + private final boolean emitOriginalQuals; private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. static { @@ -62,9 +67,10 @@ public class BaseRecalibration { * * @param RECAL_FILE a GATK Report file containing the recalibration information * @param quantizationLevels number of bins to quantize the quality scores - * @param noIndelQuals if true, do not emit base indel qualities + * @param disableIndelQuals if true, do not emit base indel qualities + * @param preserveQLessThan preserve quality scores less than this value */ - public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean noIndelQuals) { + public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) { RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); recalibrationTables = recalibrationReport.getRecalibrationTables(); @@ -76,22 +82,9 @@ public class BaseRecalibration { quantizationInfo.quantizeQualityScores(quantizationLevels); readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); - this.noIndelQuals = noIndelQuals; - } - - /** - * This constructor only exists for testing purposes. - * - * @param quantizationInfo the quantization info object - * @param recalibrationTables the map of key managers and recalibration tables - * @param requestedCovariates the list of requested covariates - */ - protected BaseRecalibration(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { - this.quantizationInfo = quantizationInfo; - this.recalibrationTables = recalibrationTables; - this.requestedCovariates = requestedCovariates; - readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); - noIndelQuals = false; + this.disableIndelQuals = disableIndelQuals; + this.preserveQLessThan = preserveQLessThan; + this.emitOriginalQuals = emitOriginalQuals; } /** @@ -102,9 +95,17 @@ public class BaseRecalibration { * @param read the read to recalibrate */ public void recalibrateRead(final GATKSAMRecord read) { - RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read + if (emitOriginalQuals && read.getAttribute(SAMTag.OQ.name()) == null) { // Save the old qualities if the tag isn't already taken in the read + try { + read.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(read.getBaseQualities())); + } catch (IllegalArgumentException e) { + throw new UserException.MalformedBAM(read, "illegal base quality encountered; " + e.getMessage()); + } + } + + RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings - if (noIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { + if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { read.setBaseQualities(null, errorModel); continue; } @@ -117,7 +118,7 @@ public class BaseRecalibration { final byte originalQualityScore = quals[offset]; - if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) + if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base quals[offset] = recalibratedQualityScore; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java similarity index 69% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java index 6d004edb1..1c84518eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java @@ -1,18 +1,20 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; public enum EventType { - BASE_SUBSTITUTION(0, "M"), - BASE_INSERTION(1, "I"), - BASE_DELETION(2, "D"); + BASE_SUBSTITUTION(0, "M", "Base Substitution"), + BASE_INSERTION(1, "I", "Base Insertion"), + BASE_DELETION(2, "D", "Base Deletion"); public final int index; private final String representation; + private final String longRepresentation; - private EventType(int index, String representation) { + private EventType(int index, String representation, String longRepresentation) { this.index = index; this.representation = representation; + this.longRepresentation = longRepresentation; } public static EventType eventFrom(int index) { @@ -40,4 +42,8 @@ public enum EventType { public String toString() { return representation; } + + public String prettyPrint() { + return longRepresentation; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java index 62edd5fac..a5a3104a0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java @@ -223,7 +223,7 @@ public class QualQuantizer { @Override public int compareTo(final QualInterval qualInterval) { - return new Integer(this.qStart).compareTo(qualInterval.qStart); + return Integer.valueOf(this.qStart).compareTo(qualInterval.qStart); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java similarity index 78% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index fb3aef949..2b67d12a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -1,11 +1,9 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.sting.utils.recalibration.QualQuantizer; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import java.util.Arrays; import java.util.List; @@ -41,7 +39,7 @@ public class QuantizationInfo { for (final RecalDatum value : qualTable.getAllValues()) { final RecalDatum datum = value; final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) - qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key + qualHistogram[empiricalQual] += datum.getNumObservations(); // add the number of observations for every key } empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities quantizeQualityScores(quantizationLevels); @@ -70,15 +68,15 @@ public class QuantizationInfo { } public GATKReportTable generateReportTable() { - GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); - quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); - quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); - quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); + GATKReportTable quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); + quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME); + quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); + quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) { - quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual); - quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); - quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); + quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual); + quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); + quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); } return quantizedTable; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java index 5e907237d..c86bd4deb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; /** * The object temporarily held by a read that describes all of it's covariates. diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java new file mode 100755 index 000000000..249422c17 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -0,0 +1,305 @@ +package org.broadinstitute.sting.utils.recalibration; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; + +import java.util.Random; + +/** + * An individual piece of recalibration data. Each bin counts up the number of observations and the number + * of reference mismatches seen for that combination of covariates. + * + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Nov 3, 2009 + */ +@Invariant({ + "estimatedQReported >= 0.0", + "! Double.isNaN(estimatedQReported)", + "! Double.isInfinite(estimatedQReported)", + "empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED", + "! Double.isNaN(empiricalQuality)", + "! Double.isInfinite(empiricalQuality)", + "numObservations >= 0", + "numMismatches >= 0", + "numMismatches <= numObservations" +}) +public class RecalDatum { + private static final double UNINITIALIZED = -1.0; + + /** + * estimated reported quality score based on combined data's individual q-reporteds and number of observations + */ + private double estimatedQReported; + + /** + * the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) + */ + private double empiricalQuality; + + /** + * number of bases seen in total + */ + private long numObservations; + + /** + * number of bases seen that didn't match the reference + */ + private long numMismatches; + + /** + * used when calculating empirical qualities to avoid division by zero + */ + private static final int SMOOTHING_CONSTANT = 1; + + //--------------------------------------------------------------------------------------------------------------- + // + // constructors + // + //--------------------------------------------------------------------------------------------------------------- + + /** + * Create a new RecalDatum with given observation and mismatch counts, and an reported quality + * + * @param _numObservations + * @param _numMismatches + * @param reportedQuality + */ + public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) { + if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0"); + if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0"); + if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0"); + + numObservations = _numObservations; + numMismatches = _numMismatches; + estimatedQReported = reportedQuality; + empiricalQuality = UNINITIALIZED; + } + + /** + * Copy copy into this recal datum, overwriting all of this objects data + * @param copy + */ + public RecalDatum(final RecalDatum copy) { + this.numObservations = copy.getNumObservations(); + this.numMismatches = copy.getNumMismatches(); + this.estimatedQReported = copy.estimatedQReported; + this.empiricalQuality = copy.empiricalQuality; + } + + /** + * Add in all of the data from other into this object, updating the reported quality from the expected + * error rate implied by the two reported qualities + * + * @param other + */ + public synchronized void combine(final RecalDatum other) { + final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); + increment(other.getNumObservations(), other.getNumMismatches()); + estimatedQReported = -10 * Math.log10(sumErrors / getNumObservations()); + empiricalQuality = UNINITIALIZED; + } + + public synchronized void setEstimatedQReported(final double estimatedQReported) { + if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0"); + if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite"); + if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN"); + + this.estimatedQReported = estimatedQReported; + } + + public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { + final Random random = new Random(); + final int nObservations = random.nextInt(maxObservations); + final int nErrors = random.nextInt(maxErrors); + final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE); + return new RecalDatum(nObservations, nErrors, (byte)qual); + } + + public final double getEstimatedQReported() { + return estimatedQReported; + } + public final byte getEstimatedQReportedAsByte() { + return (byte)(int)(Math.round(getEstimatedQReported())); + } + + //--------------------------------------------------------------------------------------------------------------- + // + // Empirical quality score -- derived from the num mismatches and observations + // + //--------------------------------------------------------------------------------------------------------------- + + /** + * Returns the error rate (in real space) of this interval, or 0 if there are no obserations + * @return the empirical error rate ~= N errors / N obs + */ + @Ensures("result >= 0.0") + public double getEmpiricalErrorRate() { + if ( numObservations == 0 ) + return 0.0; + else { + // cache the value so we don't call log over and over again + final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); + // smoothing is one error and one non-error observation, for example + final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); + return doubleMismatches / doubleObservations; + } + } + + public synchronized void setEmpiricalQuality(final double empiricalQuality) { + if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0"); + if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite"); + if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN"); + + this.empiricalQuality = empiricalQuality; + } + + public final double getEmpiricalQuality() { + if (empiricalQuality == UNINITIALIZED) + calcEmpiricalQuality(); + return empiricalQuality; + } + + public final byte getEmpiricalQualityAsByte() { + return (byte)(Math.round(getEmpiricalQuality())); + } + + //--------------------------------------------------------------------------------------------------------------- + // + // increment methods + // + //--------------------------------------------------------------------------------------------------------------- + + @Override + public String toString() { + return String.format("%d,%d,%d", getNumObservations(), getNumMismatches(), (byte) Math.floor(getEmpiricalQuality())); + } + + public String stringForCSV() { + return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported()); + } + +// /** +// * We don't compare the estimated quality reported because it may be different when read from +// * report tables. +// * +// * @param o the other recal datum +// * @return true if the two recal datums have the same number of observations, errors and empirical quality. +// */ +// @Override +// public boolean equals(Object o) { +// if (!(o instanceof RecalDatum)) +// return false; +// RecalDatum other = (RecalDatum) o; +// return super.equals(o) && +// MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0; +// } + + //--------------------------------------------------------------------------------------------------------------- + // + // increment methods + // + //--------------------------------------------------------------------------------------------------------------- + + public long getNumObservations() { + return numObservations; + } + + public synchronized void setNumObservations(final long numObservations) { + if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0"); + this.numObservations = numObservations; + empiricalQuality = UNINITIALIZED; + } + + public long getNumMismatches() { + return numMismatches; + } + + @Requires({"numMismatches >= 0"}) + public synchronized void setNumMismatches(final long numMismatches) { + if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0"); + this.numMismatches = numMismatches; + empiricalQuality = UNINITIALIZED; + } + + @Requires({"by >= 0"}) + public synchronized void incrementNumObservations(final long by) { + numObservations += by; + empiricalQuality = UNINITIALIZED; + } + + @Requires({"by >= 0"}) + public synchronized void incrementNumMismatches(final long by) { + numMismatches += by; + empiricalQuality = UNINITIALIZED; + } + + @Requires({"incObservations >= 0", "incMismatches >= 0"}) + @Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"}) + public synchronized void increment(final long incObservations, final long incMismatches) { + incrementNumObservations(incObservations); + incrementNumMismatches(incMismatches); + } + + @Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"}) + public synchronized void increment(final boolean isError) { + incrementNumObservations(1); + if ( isError ) + incrementNumMismatches(1); + } + + // ------------------------------------------------------------------------------------- + // + // Private implementation helper functions + // + // ------------------------------------------------------------------------------------- + + /** + * Calculate and cache the empirical quality score from mismatches and observations (expensive operation) + */ + @Requires("empiricalQuality == UNINITIALIZED") + @Ensures("empiricalQuality != UNINITIALIZED") + private synchronized final void calcEmpiricalQuality() { + final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate()); + empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); + } + + /** + * calculate the expected number of errors given the estimated Q reported and the number of observations + * in this datum. + * + * @return a positive (potentially fractional) estimate of the number of errors + */ + @Ensures("result >= 0.0") + private double calcExpectedErrors() { + return (double) getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java new file mode 100644 index 000000000..102aa4433 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -0,0 +1,531 @@ +package org.broadinstitute.sting.utils.recalibration; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.apache.commons.math.MathException; +import org.apache.commons.math.stat.inference.ChiSquareTestImpl; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Set; + +/** + * A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one + * + * @author Mark DePristo + * @since 07/27/12 + */ +public class RecalDatumNode { + private final static double SMALLEST_CHI2_PVALUE = 1e-300; + protected static Logger logger = Logger.getLogger(RecalDatumNode.class); + + /** + * fixedPenalty is this value if it's considered fixed + */ + private final static double UNINITIALIZED = Double.NEGATIVE_INFINITY; + + private final T recalDatum; + private double fixedPenalty = UNINITIALIZED; + private final Set> subnodes; + + @Requires({"recalDatum != null"}) + public RecalDatumNode(final T recalDatum) { + this(recalDatum, new HashSet>()); + } + + @Override + public String toString() { + return recalDatum.toString(); + } + + @Requires({"recalDatum != null", "subnodes != null"}) + public RecalDatumNode(final T recalDatum, final Set> subnodes) { + this(recalDatum, UNINITIALIZED, subnodes); + } + + @Requires({"recalDatum != null"}) + protected RecalDatumNode(final T recalDatum, final double fixedPenalty) { + this(recalDatum, fixedPenalty, new HashSet>()); + } + + @Requires({"recalDatum != null", "subnodes != null"}) + protected RecalDatumNode(final T recalDatum, final double fixedPenalty, final Set> subnodes) { + this.recalDatum = recalDatum; + this.fixedPenalty = fixedPenalty; + this.subnodes = new HashSet>(subnodes); + } + + /** + * Get the recal data associated with this node + * @return + */ + @Ensures("result != null") + public T getRecalDatum() { + return recalDatum; + } + + /** + * The set of all subnodes of this tree. May be modified. + * @return + */ + @Ensures("result != null") + public Set> getSubnodes() { + return subnodes; + } + + /** + * Return the fixed penalty, if set, or else the the calculated penalty for this node + * @return + */ + public double getPenalty() { + if ( fixedPenalty != UNINITIALIZED ) + return fixedPenalty; + else + return calcPenalty(); + } + + /** + * Set the fixed penalty for this node to a fresh calculation from calcPenalty + * + * This is important in the case where you want to compute the penalty from a full + * tree and then chop the tree up afterwards while considering the previous penalties. + * If you don't call this function then manipulating the tree may result in the + * penalty functions changing with changes in the tree. + * + * @param doEntireTree recurse into all subnodes? + * @return the fixed penalty for this node + */ + public double calcAndSetFixedPenalty(final boolean doEntireTree) { + fixedPenalty = calcPenalty(); + if ( doEntireTree ) + for ( final RecalDatumNode sub : subnodes ) + sub.calcAndSetFixedPenalty(doEntireTree); + return fixedPenalty; + } + + /** + * Add node to the set of subnodes of this node + * @param sub + */ + @Requires("sub != null") + public void addSubnode(final RecalDatumNode sub) { + subnodes.add(sub); + } + + /** + * Is this a leaf node (i.e., has no subnodes)? + * @return + */ + public boolean isLeaf() { + return subnodes.isEmpty(); + } + + /** + * Is this node immediately above only leaf nodes? + * + * @return + */ + public boolean isAboveOnlyLeaves() { + for ( final RecalDatumNode sub : subnodes ) + if ( ! sub.isLeaf() ) + return false; + return true; + } + + /** + * What's the immediate number of subnodes from this node? + * @return + */ + @Ensures("result >= 0") + public int getNumSubnodes() { + return subnodes.size(); + } + + /** + * Total penalty is the sum of leaf node penalties + * + * This algorithm assumes that penalties have been fixed before pruning, as leaf nodes by + * definition have 0 penalty unless they represent a pruned tree with underlying -- but now + * pruned -- subtrees + * + * @return + */ + public double totalPenalty() { + if ( isLeaf() ) + return getPenalty(); + else { + double sum = 0.0; + for ( final RecalDatumNode sub : subnodes ) + sum += sub.totalPenalty(); + return sum; + } + } + + /** + * The maximum penalty among all nodes + * @return + */ + public double maxPenalty(final boolean leafOnly) { + double max = ! leafOnly || isLeaf() ? getPenalty() : Double.MIN_VALUE; + for ( final RecalDatumNode sub : subnodes ) + max = Math.max(max, sub.maxPenalty(leafOnly)); + return max; + } + + /** + * The minimum penalty among all nodes + * @return + */ + public double minPenalty(final boolean leafOnly) { + double min = ! leafOnly || isLeaf() ? getPenalty() : Double.MAX_VALUE; + for ( final RecalDatumNode sub : subnodes ) + min = Math.min(min, sub.minPenalty(leafOnly)); + return min; + } + + /** + * What's the longest branch from this node to any leaf? + * @return + */ + public int maxDepth() { + int subMax = 0; + for ( final RecalDatumNode sub : subnodes ) + subMax = Math.max(subMax, sub.maxDepth()); + return subMax + 1; + } + + /** + * What's the shortest branch from this node to any leaf? Includes this node + * @return + */ + @Ensures("result > 0") + public int minDepth() { + if ( isLeaf() ) + return 1; + else { + int subMin = Integer.MAX_VALUE; + for ( final RecalDatumNode sub : subnodes ) + subMin = Math.min(subMin, sub.minDepth()); + return subMin + 1; + } + } + + /** + * Return the number of nodes, including this one, reachable from this node + * @return + */ + @Ensures("result > 0") + public int size() { + int size = 1; + for ( final RecalDatumNode sub : subnodes ) + size += sub.size(); + return size; + } + + /** + * Count the number of leaf nodes reachable from this node + * + * @return + */ + @Ensures("result >= 0") + public int numLeaves() { + if ( isLeaf() ) + return 1; + else { + int size = 0; + for ( final RecalDatumNode sub : subnodes ) + size += sub.numLeaves(); + return size; + } + } + + /** + * Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node. + * + * The chi^2 value indicates the degree of independence of the implied error rates among the + * immediate subnodes + * + * @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated + */ + private double calcPenalty() { + if ( isLeaf() || freeToMerge() ) + return 0.0; + else if ( subnodes.size() == 1 ) + // only one value, so its free to merge away + return 0.0; + else { + final long[][] counts = new long[subnodes.size()][2]; + + int i = 0; + for ( final RecalDatumNode subnode : subnodes ) { + // use the yates correction to help avoid all zeros => NaN + counts[i][0] = subnode.getRecalDatum().getNumMismatches() + 1; + counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2; + i++; + } + + try { + final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts); + final double penalty = -10 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE)); + + // make sure things are reasonable and fail early if not + if (Double.isInfinite(penalty) || Double.isNaN(penalty)) + throw new ReviewedStingException("chi2 value is " + chi2PValue + " at " + getRecalDatum()); + + return penalty; + } catch ( MathException e ) { + throw new ReviewedStingException("Failed in calculating chi2 value", e); + } + } + } + + /** + * Is this node free to merge because its rounded Q score is the same as all nodes below + * @return + */ + private boolean freeToMerge() { + if ( isLeaf() ) // leaves are free to merge + return true; + else { + final byte myQual = getRecalDatum().getEmpiricalQualityAsByte(); + for ( final RecalDatumNode sub : subnodes ) + if ( sub.getRecalDatum().getEmpiricalQualityAsByte() != myQual ) + return false; + return true; + } + } + + /** + * Calculate the penalty of this interval, given the overall error rate for the interval + * + * If the globalErrorRate is e, this value is: + * + * sum_i |log10(e_i) - log10(e)| * nObservations_i + * + * each the index i applies to all leaves of the tree accessible from this interval + * (found recursively from subnodes as necessary) + * + * @param globalErrorRate overall error rate in real space against which we calculate the penalty + * @return the cost of approximating the bins in this interval with the globalErrorRate + */ + @Requires("globalErrorRate >= 0.0") + @Ensures("result >= 0.0") + private double calcPenaltyLog10(final double globalErrorRate) { + if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty + return 0.0; + + if ( isLeaf() ) { + // this is leave node + return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations(); + // TODO -- how we can generalize this calculation? +// if ( this.qEnd <= minInterestingQual ) +// // It's free to merge up quality scores below the smallest interesting one +// return 0; +// else { +// return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations(); +// } + } else { + double sum = 0; + for ( final RecalDatumNode hrd : subnodes) + sum += hrd.calcPenaltyLog10(globalErrorRate); + return sum; + } + } + + /** + * Return a freshly allocated tree prunes to have no more than maxDepth from the root to any leaf + * + * @param maxDepth + * @return + */ + public RecalDatumNode pruneToDepth(final int maxDepth) { + if ( maxDepth < 1 ) + throw new IllegalArgumentException("maxDepth < 1"); + else { + final Set> subPruned = new HashSet>(getNumSubnodes()); + if ( maxDepth > 1 ) + for ( final RecalDatumNode sub : subnodes ) + subPruned.add(sub.pruneToDepth(maxDepth - 1)); + return new RecalDatumNode(getRecalDatum(), fixedPenalty, subPruned); + } + } + + /** + * Return a freshly allocated tree with to no more than maxElements in order of penalty + * + * Note that nodes must have fixed penalties to this algorithm will fail. + * + * @param maxElements + * @return + */ + public RecalDatumNode pruneByPenalty(final int maxElements) { + RecalDatumNode root = this; + + while ( root.size() > maxElements ) { + // remove the lowest penalty element, and continue + root = root.removeLowestPenaltyNode(); + } + + // our size is below the target, so we are good, return + return root; + } + + /** + * Return a freshly allocated tree where all mergable nodes with < maxPenalty are merged + * + * Note that nodes must have fixed penalties to this algorithm will fail. + * + * @param maxPenaltyIn the maximum penalty we are allowed to incur for a merge + * @param applyBonferroniCorrection if true, we will adjust penalty by the phred-scaled bonferroni correction + * for the size of the initial tree. That is, if there are 10 nodes in the + * tree and maxPenalty is 20 we will actually enforce 10^-2 / 10 = 10^-3 = 30 + * penalty for multiple testing + * @return + */ + public RecalDatumNode pruneToNoMoreThanPenalty(final double maxPenaltyIn, final boolean applyBonferroniCorrection) { + RecalDatumNode root = this; + + final double bonferroniCorrection = 10 * Math.log10(this.size()); + final double maxPenalty = applyBonferroniCorrection ? maxPenaltyIn + bonferroniCorrection : maxPenaltyIn; + + if ( applyBonferroniCorrection ) + logger.info(String.format("Applying Bonferroni correction for %d nodes = %.2f to initial penalty %.2f for total " + + "corrected max penalty of %.2f", this.size(), bonferroniCorrection, maxPenaltyIn, maxPenalty)); + + while ( true ) { + final Pair, Double> minPenaltyNode = root.getMinPenaltyAboveLeafNode(); + + if ( minPenaltyNode == null || minPenaltyNode.getSecond() > maxPenalty ) { + // nothing to merge, or the best candidate is above our max allowed + if ( minPenaltyNode == null ) { + if ( logger.isDebugEnabled() ) logger.debug("Stopping because no candidates could be found"); + } else { + if ( logger.isDebugEnabled() ) logger.debug("Stopping because node " + minPenaltyNode.getFirst() + " has penalty " + minPenaltyNode.getSecond() + " > max " + maxPenalty); + } + break; + } else { + // remove the lowest penalty element, and continue + if ( logger.isDebugEnabled() ) logger.debug("Removing node " + minPenaltyNode.getFirst() + " with penalty " + minPenaltyNode.getSecond()); + root = root.removeLowestPenaltyNode(); + } + } + + // no more candidates exist with penalty < maxPenalty + return root; + } + + + /** + * Find the lowest penalty above leaf node in the tree, and return a tree without it + * + * Note this excludes the current (root) node + * + * @return + */ + private RecalDatumNode removeLowestPenaltyNode() { + final Pair, Double> nodeToRemove = getMinPenaltyAboveLeafNode(); + if ( logger.isDebugEnabled() ) + logger.debug("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond()); + + final Pair, Boolean> result = removeNode(nodeToRemove.getFirst()); + + if ( ! result.getSecond() ) + throw new IllegalStateException("Never removed any node!"); + + final RecalDatumNode oneRemoved = result.getFirst(); + if ( oneRemoved == null ) + throw new IllegalStateException("Removed our root node, wow, didn't expect that"); + return oneRemoved; + } + + /** + * Finds in the tree the node with the lowest penalty whose subnodes are all leaves + * + * @return the node and its penalty, or null if no such node exists + */ + private Pair, Double> getMinPenaltyAboveLeafNode() { + if ( isLeaf() ) + // not allowed to remove leafs directly + return null; + if ( isAboveOnlyLeaves() ) + // we only consider removing nodes above all leaves + return new Pair, Double>(this, getPenalty()); + else { + // just recurse, taking the result with the min penalty of all subnodes + Pair, Double> minNode = null; + for ( final RecalDatumNode sub : subnodes ) { + final Pair, Double> subFind = sub.getMinPenaltyAboveLeafNode(); + if ( subFind != null && (minNode == null || subFind.getSecond() < minNode.getSecond()) ) { + minNode = subFind; + } + } + return minNode; + } + } + + /** + * Return a freshly allocated tree without the node nodeToRemove + * + * @param nodeToRemove + * @return + */ + private Pair, Boolean> removeNode(final RecalDatumNode nodeToRemove) { + if ( this == nodeToRemove ) { + if ( isLeaf() ) + throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + nodeToRemove); + // node is the thing we are going to remove, but without any subnodes + final RecalDatumNode node = new RecalDatumNode(getRecalDatum(), fixedPenalty); + return new Pair, Boolean>(node, true); + } else { + // did we remove something in a sub branch? + boolean removedSomething = false; + + // our sub nodes with the penalty node removed + final Set> sub = new HashSet>(getNumSubnodes()); + + for ( final RecalDatumNode sub1 : subnodes ) { + if ( removedSomething ) { + // already removed something, just add sub1 back to sub + sub.add(sub1); + } else { + // haven't removed anything yet, so try + final Pair, Boolean> maybeRemoved = sub1.removeNode(nodeToRemove); + removedSomething = maybeRemoved.getSecond(); + sub.add(maybeRemoved.getFirst()); + } + } + + final RecalDatumNode node = new RecalDatumNode(getRecalDatum(), fixedPenalty, sub); + return new Pair, Boolean>(node, removedSomething); + } + } + + /** + * Return a collection of all of the data in the leaf nodes of this tree + * + * @return + */ + public Collection getAllLeaves() { + final LinkedList list = new LinkedList(); + getAllLeavesRec(list); + return list; + } + + /** + * Helpful recursive function for getAllLeaves() + * + * @param list the destination for the list of leaves + */ + private void getAllLeavesRec(final LinkedList list) { + if ( isLeaf() ) + list.add(getRecalDatum()); + else { + for ( final RecalDatumNode sub : subnodes ) + sub.getAllLeavesRec(list); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index 876ce585a..fe6ef7018 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -23,11 +23,13 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; @@ -39,7 +41,6 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.io.Resource; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -59,7 +60,7 @@ import java.util.*; * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration. */ -public class RecalDataManager { +public class RecalUtils { public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments"; public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized"; public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0"; @@ -85,13 +86,108 @@ public class RecalDataManager { private static final String SCRIPT_FILE = "BQSR.R"; - private static final Pair covariateValue = new Pair(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s"); - private static final Pair covariateName = new Pair(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s"); - private static final Pair eventType = new Pair(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s"); - private static final Pair empiricalQuality = new Pair(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f"); - private static final Pair estimatedQReported = new Pair(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f"); - private static final Pair nObservations = new Pair(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); - private static final Pair nErrors = new Pair(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d"); + private static final Pair covariateValue = new Pair(RecalUtils.COVARIATE_VALUE_COLUMN_NAME, "%s"); + private static final Pair covariateName = new Pair(RecalUtils.COVARIATE_NAME_COLUMN_NAME, "%s"); + private static final Pair eventType = new Pair(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s"); + private static final Pair empiricalQuality = new Pair(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f"); + private static final Pair estimatedQReported = new Pair(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f"); + private static final Pair nObservations = new Pair(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); + private static final Pair nErrors = new Pair(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%d"); + + /** + * Generates two lists : required covariates and optional covariates based on the user's requests. + * + * Performs the following tasks in order: + * 1. Adds all requierd covariates in order + * 2. Check if the user asked to use the standard covariates and adds them all if that's the case + * 3. Adds all covariates requested by the user that were not already added by the two previous steps + * + * @param argumentCollection the argument collection object for the recalibration walker + * @return a pair of ordered lists : required covariates (first) and optional covariates (second) + */ + public static Pair, ArrayList> initializeCovariates(RecalibrationArgumentCollection argumentCollection) { + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); + final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); + final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); + + final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates + ArrayList optionalCovariates = new ArrayList(); + if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) + optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user + + if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified + for (String requestedCovariateString : argumentCollection.COVARIATES) { + boolean foundClass = false; + for (Class covClass : covariateClasses) { + if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class + foundClass = true; + if (!requiredClasses.contains(covClass) && + (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { + try { + final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it + optionalCovariates.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + } + } + + if (!foundClass) { + throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); + } + } + } + return new Pair, ArrayList>(requiredCovariates, optionalCovariates); + } + + /** + * Adds the required covariates to a covariate list + * + * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand. + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addRequiredCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + if (classes.size() != 2) + throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); + + dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. + dest.add(new QualityScoreCovariate()); + return dest; + } + + /** + * Adds the standard covariates to a covariate list + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addStandardCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + for (Class covClass : classes) { + try { + final Covariate covariate = (Covariate) covClass.newInstance(); + dest.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + return dest; + } + + public static void listAvailableCovariates(Logger logger) { + // Get a list of all available covariates + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); + + // Print and exit if that's what was requested + logger.info("Available covariates:"); + for (Class covClass : covariateClasses) + logger.info(covClass.getSimpleName()); + logger.info(""); + } public enum SOLID_RECAL_MODE { @@ -152,64 +248,6 @@ public class RecalDataManager { } } - /** - * Generates two lists : required covariates and optional covariates based on the user's requests. - * - * Performs the following tasks in order: - * 1. Adds all requierd covariates in order - * 2. Check if the user asked to use the standard covariates and adds them all if that's the case - * 3. Adds all covariates requested by the user that were not already added by the two previous steps - * - * @param argumentCollection the argument collection object for the recalibration walker - * @return a pair of ordered lists : required covariates (first) and optional covariates (second) - */ - public static Pair, ArrayList> initializeCovariates(RecalibrationArgumentCollection argumentCollection) { - final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); - final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); - - final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates - ArrayList optionalCovariates = new ArrayList(); - if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) - optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user - - if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified - for (String requestedCovariateString : argumentCollection.COVARIATES) { - boolean foundClass = false; - for (Class covClass : covariateClasses) { - if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class - foundClass = true; - if (!requiredClasses.contains(covClass) && - (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { - try { - final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it - optionalCovariates.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - } - } - - if (!foundClass) { - throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); - } - } - } - return new Pair, ArrayList>(requiredCovariates, optionalCovariates); - } - - public static void listAvailableCovariates(Logger logger) { - // Get a list of all available covariates - final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - - // Print and exit if that's what was requested - logger.info("Available covariates:"); - for (Class covClass : covariateClasses) - logger.info(covClass.getSimpleName()); - logger.info(""); - } - private static List generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { List result = new LinkedList(); int reportTableIndex = 0; @@ -272,8 +310,8 @@ public class RecalDataManager { reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality()); if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index) reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table - reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations); - reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches); + reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations()); + reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches()); rowIndex++; } @@ -320,7 +358,7 @@ public class RecalDataManager { files.getFirst().close(); final RScriptExecutor executor = new RScriptExecutor(); - executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class)); + executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class)); executor.addArgs(csvFileName.getAbsolutePath()); executor.addArgs(plotFileName.getAbsolutePath()); executor.exec(); @@ -410,9 +448,7 @@ public class RecalDataManager { final int covariateKey = (Integer)keys.get(2); values.add(covariate.formatKey(covariateKey)); values.add(covariateNameMap.get(covariate)); - - final EventType event = EventType.eventFrom((Integer)keys.get(3)); - values.add(event); + values.add(EventType.eventFrom((Integer)keys.get(3)).prettyPrint()); return values; } @@ -482,14 +518,14 @@ public class RecalDataManager { */ public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) { if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base - if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); + if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read + final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG); if (attr != null) { byte[] colorSpace; if (attr instanceof String) colorSpace = ((String) attr).getBytes(); else - throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); + throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read if (read.getReadNegativeStrandFlag()) @@ -503,7 +539,7 @@ public class RecalDataManager { inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1); prevBase = readBases[i]; } - read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); + read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); } else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); @@ -547,7 +583,7 @@ public class RecalDataManager { * @return Returns true if the base was inconsistent with the color space */ public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) { - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); + final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG); if (attr != null) { final byte[] inconsistency = (byte[]) attr; // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference! @@ -693,40 +729,4 @@ public class RecalDataManager { } - /** - * Adds the required covariates to a covariate list - * - * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand. - * - * @param classes list of classes to add to the covariate list - * @return the covariate list - */ - private static ArrayList addRequiredCovariatesToList(List> classes) { - ArrayList dest = new ArrayList(classes.size()); - if (classes.size() != 2) - throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); - - dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. - dest.add(new QualityScoreCovariate()); - return dest; - } - - /** - * Adds the standard covariates to a covariate list - * - * @param classes list of classes to add to the covariate list - * @return the covariate list - */ - private static ArrayList addStandardCovariatesToList(List> classes) { - ArrayList dest = new ArrayList(classes.size()); - for (Class covClass : classes) { - try { - final Covariate covariate = (Covariate) covClass.newInstance(); - dest.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - return dest; - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java similarity index 82% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index e69cf4d69..e6ab9e38b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -1,11 +1,12 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import java.io.File; import java.io.PrintStream; @@ -33,13 +34,13 @@ public class RecalibrationReport { public RecalibrationReport(final File RECAL_FILE) { final GATKReport report = new GATKReport(RECAL_FILE); - argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE); + argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE); RAC = initializeArgumentCollectionTable(argumentTable); - GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE); + GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE); quantizationInfo = initializeQuantizationTable(quantizedTable); - Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates + Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList requiredCovariates = covariates.getFirst(); ArrayList optionalCovariates = covariates.getSecond(); requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; @@ -57,13 +58,13 @@ public class RecalibrationReport { for (Covariate cov : requestedCovariates) cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection - recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE))); + recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE))); - parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE)); + parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE)); - parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE)); + parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE)); - parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables); + parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables); } @@ -85,7 +86,7 @@ public class RecalibrationReport { private int countReadGroups(final GATKReportTable reportTable) { Set readGroups = new HashSet(); for ( int i = 0; i < reportTable.getNumRows(); i++ ) - readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString()); + readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString()); return readGroups.size(); } @@ -139,17 +140,17 @@ public class RecalibrationReport { \ */ private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg); - final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME); tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual); - final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME); + final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME); final int covIndex = optionalCovariateIndexes.get(covName); - final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME); + final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME); tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempCOVarray[3] = event.index; recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray); @@ -164,11 +165,11 @@ public class RecalibrationReport { */ private void parseQualityScoreTable(final GATKReportTable reportTable, final NestedIntegerArray qualTable) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg); - final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME); tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempQUALarray[2] = event.index; qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray); @@ -183,9 +184,9 @@ public class RecalibrationReport { */ private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray rgTable) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempRGarray[0] = requestedCovariates[0].keyFromValue(rg); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempRGarray[1] = event.index; rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray); @@ -193,13 +194,13 @@ public class RecalibrationReport { } private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) { - final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); + final long nObservations = (Long) reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME); + final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME); final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table - (Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table - Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table + (Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table + Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1); datum.setEstimatedQReported(estimatedQReported); @@ -218,8 +219,8 @@ public class RecalibrationReport { final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1]; for ( int i = 0; i < table.getNumRows(); i++ ) { final byte originalQual = (byte)i; - final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); - final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); + final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); + final Object countObject = table.get(i, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); final byte quantizedQual = Byte.parseByte(quantizedObject.toString()); final long quantizedCount = Long.parseLong(countObject.toString()); quals[originalQual] = quantizedQual; @@ -239,7 +240,7 @@ public class RecalibrationReport { for ( int i = 0; i < table.getNumRows(); i++ ) { final String argument = table.get(i, "Argument").toString(); - Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); if (value.equals("null")) value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport @@ -250,10 +251,10 @@ public class RecalibrationReport { RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value); else if (argument.equals("solid_recal_mode")) - RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value); + RAC.SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.recalModeFromString((String) value); else if (argument.equals("solid_nocall_strategy")) - RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); + RAC.SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); else if (argument.equals("mismatches_context_size")) RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value); @@ -307,7 +308,7 @@ public class RecalibrationReport { } public void output(PrintStream output) { - RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output); + RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output); } public RecalibrationArgumentCollection getRAC() { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java index 0416b5eb9..f37e69c9a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.utils.recalibration; -import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; -import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java similarity index 89% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java index a89586c2c..cebdebf9d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 7d55c620b..4c20284d9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -23,8 +23,10 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; @@ -51,10 +53,6 @@ public class ContextCovariate implements StandardCovariate { private static final int LENGTH_BITS = 4; private static final int LENGTH_MASK = 15; - // temporary lists to use for creating context covariate keys - private final ArrayList mismatchKeys = new ArrayList(200); - private final ArrayList indelKeys = new ArrayList(200); - // the maximum context size (number of bases) permitted; we need to keep the leftmost base free so that values are // not negative and we reserve 4 more bits to represent the length of the context; it takes 2 bits to encode one base. static final private int MAX_DNA_CONTEXT = 13; @@ -82,6 +80,8 @@ public class ContextCovariate implements StandardCovariate { @Override public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { + // store the original bases and then write Ns over low quality ones + final byte[] originalBases = read.getReadBases().clone(); final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag(); @@ -89,16 +89,17 @@ public class ContextCovariate implements StandardCovariate { if (negativeStrand) bases = BaseUtils.simpleReverseComplement(bases); - mismatchKeys.clear(); - indelKeys.clear(); - contextWith(bases, mismatchesContextSize, mismatchKeys, mismatchesKeyMask); - contextWith(bases, indelsContextSize, indelKeys, indelsKeyMask); + final ArrayList mismatchKeys = contextWith(bases, mismatchesContextSize, mismatchesKeyMask); + final ArrayList indelKeys = contextWith(bases, indelsContextSize, indelsKeyMask); final int readLength = bases.length; for (int i = 0; i < readLength; i++) { final int indelKey = indelKeys.get(i); values.addCovariate(mismatchKeys.get(i), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i)); } + + // put the original bases back in + read.setReadBases(originalBases); } // Used to get the covariate's value from input csv file during on-the-fly recalibration @@ -134,17 +135,19 @@ public class ContextCovariate implements StandardCovariate { * * @param bases the bases in the read to build the context from * @param contextSize context size to use building the context - * @param keys list to store the keys * @param mask mask for pulling out just the context bits */ - private static void contextWith(final byte[] bases, final int contextSize, final ArrayList keys, final int mask) { + private static ArrayList contextWith(final byte[] bases, final int contextSize, final int mask) { + + final int readLength = bases.length; + final ArrayList keys = new ArrayList(readLength); // the first contextSize-1 bases will not have enough previous context - for (int i = 1; i < contextSize && i <= bases.length; i++) + for (int i = 1; i < contextSize && i <= readLength; i++) keys.add(-1); - if (bases.length < contextSize) - return; + if (readLength < contextSize) + return keys; final int newBaseOffset = 2 * (contextSize - 1) + LENGTH_BITS; @@ -166,7 +169,6 @@ public class ContextCovariate implements StandardCovariate { } } - final int readLength = bases.length; for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) { final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]); if (baseIndex == -1) { // ignore non-ACGT bases @@ -186,6 +188,8 @@ public class ContextCovariate implements StandardCovariate { keys.add(-1); } } + + return keys; } public static int keyFromContext(final String dna) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java index 1ad5346fa..c613135bb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /* @@ -89,8 +91,3 @@ public interface Covariate { public int maximumKeyValue(); } -interface RequiredCovariate extends Covariate {} - -interface StandardCovariate extends Covariate {} - -interface ExperimentalCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java index e3b7f2637..4f15419c7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -49,6 +51,7 @@ public class CycleCovariate implements StandardCovariate { private static final int MAXIMUM_CYCLE_VALUE = 1000; private static final int CUSHION_FOR_INDELS = 4; + private static String default_platform = null; private static final EnumSet DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS); private static final EnumSet FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT); @@ -58,13 +61,16 @@ public class CycleCovariate implements StandardCovariate { public void initialize(final RecalibrationArgumentCollection RAC) { if (RAC.DEFAULT_PLATFORM != null && !NGSPlatform.isKnown(RAC.DEFAULT_PLATFORM)) throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform."); + + if (RAC.DEFAULT_PLATFORM != null) + default_platform = RAC.DEFAULT_PLATFORM; } // Used to pick out the covariate's value from attributes of the read @Override public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { final int readLength = read.getReadLength(); - final NGSPlatform ngsPlatform = read.getNGSPlatform(); + final NGSPlatform ngsPlatform = default_platform == null ? read.getNGSPlatform() : NGSPlatform.fromReadGroupPL(default_platform); // Discrete cycle platforms if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java new file mode 100644 index 000000000..72df2a410 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface ExperimentalCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java similarity index 92% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java index dd7060ff8..3ef8ee931 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java index f04d27b7a..85568dac9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java new file mode 100644 index 000000000..50755dbcf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface RequiredCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java new file mode 100644 index 000000000..444954f25 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface StandardCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index e5e747c2d..2c388a1e0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.recalibration.EventType; import java.util.ArrayList; import java.util.Arrays; @@ -405,6 +406,40 @@ public class AlignmentUtils { return alignment; } + public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) { + + int numHQSoftClips = 0; + int alignPos = 0; + final Cigar cigar = read.getCigar(); + final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION ); + + for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) { + + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch( ce.getOperator() ) { + case S: + for( int jjj = 0; jjj < elementLength; jjj++ ) { + if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; } + } + break; + case M: + case I: + alignPos += elementLength; + break; + case H: + case P: + case D: + case N: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + return numHQSoftClips; + } + public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) { return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus ); } @@ -441,7 +476,6 @@ public class AlignmentUtils { } break; case D: - case N: if (!isDeletion) { alignmentPos += elementLength; } else { @@ -463,6 +497,7 @@ public class AlignmentUtils { break; case H: case P: + case N: break; default: throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); @@ -481,16 +516,13 @@ public class AlignmentUtils { final int elementLength = ce.getLength(); switch (ce.getOperator()) { - case I: - case S: - break; case D: case N: - alignmentLength += elementLength; - break; case M: alignmentLength += elementLength; break; + case I: + case S: case H: case P: break; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 659615cf4..c9b3a2df8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 6b9ba79b4..c16470c48 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -56,6 +56,15 @@ public class ReadUtils { private static int DEFAULT_ADAPTOR_SIZE = 100; public static int CLIPPING_GOAL_NOT_REACHED = -1; + public static int getMeanRepresentativeReadCount(GATKSAMRecord read) { + if (!read.isReducedRead()) + return 1; + + // compute mean representative read counts + final byte[] counts = read.getReducedReadCounts(); + return (int)Math.round((double)MathUtils.sum(counts)/counts.length); + } + /** * A marker to tell which end of the read has been clipped */ diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index 2e1770581..2c312678e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.ArrayList; +import org.broadinstitute.sting.utils.BaseUtils; + import java.util.Arrays; import java.util.Collection; -import java.util.List; /** * Immutable representation of an allele @@ -77,32 +77,36 @@ public class Allele implements Comparable { private static final byte[] EMPTY_ALLELE_BASES = new byte[0]; private boolean isRef = false; - private boolean isNull = false; private boolean isNoCall = false; private boolean isSymbolic = false; private byte[] bases = null; - public final static String NULL_ALLELE_STRING = "-"; public final static String NO_CALL_STRING = "."; /** A generic static NO_CALL allele for use */ // no public way to create an allele private Allele(byte[] bases, boolean isRef) { - // standardize our representation of null allele and bases + // null alleles are no longer allowed if ( wouldBeNullAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; - isNull = true; - } else if ( wouldBeNoCallAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; + throw new IllegalArgumentException("Null alleles are not supported"); + } + + // no-calls are represented as no bases + if ( wouldBeNoCallAllele(bases) ) { + this.bases = EMPTY_ALLELE_BASES; isNoCall = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); - } else if ( wouldBeSymbolicAllele(bases) ) { + return; + } + + if ( wouldBeSymbolicAllele(bases) ) { isSymbolic = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele"); } -// else -// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance + else { + bases = BaseUtils.convertToUpperCase(bases); + } this.isRef = isRef; this.bases = bases; @@ -126,8 +130,6 @@ public class Allele implements Comparable { private final static Allele ALT_T = new Allele("T", false); private final static Allele REF_N = new Allele("N", true); private final static Allele ALT_N = new Allele("N", false); - private final static Allele REF_NULL = new Allele(NULL_ALLELE_STRING, true); - private final static Allele ALT_NULL = new Allele(NULL_ALLELE_STRING, false); public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false); // --------------------------------------------------------------------------------------------------------- @@ -154,7 +156,6 @@ public class Allele implements Comparable { case '.': if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); return NO_CALL; - case '-': return isRef ? REF_NULL : ALT_NULL; case 'A': case 'a' : return isRef ? REF_A : ALT_A; case 'C': case 'c' : return isRef ? REF_C : ALT_C; case 'G': case 'g' : return isRef ? REF_G : ALT_G; @@ -179,14 +180,9 @@ public class Allele implements Comparable { public static Allele extend(Allele left, byte[] right) { if (left.isSymbolic()) throw new IllegalArgumentException("Cannot extend a symbolic allele"); - byte[] bases = null; - if ( left.length() == 0 ) - bases = right; - else { - bases = new byte[left.length() + right.length]; - System.arraycopy(left.getBases(), 0, bases, 0, left.length()); - System.arraycopy(right, 0, bases, left.length(), right.length); - } + byte[] bases = new byte[left.length() + right.length]; + System.arraycopy(left.getBases(), 0, bases, 0, left.length()); + System.arraycopy(right, 0, bases, left.length(), right.length); return create(bases, left.isReference()); } @@ -242,7 +238,10 @@ public class Allele implements Comparable { } public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) { - if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) + if ( wouldBeNullAllele(bases) ) + return false; + + if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) return true; for (byte base : bases ) { @@ -299,11 +298,6 @@ public class Allele implements Comparable { // // --------------------------------------------------------------------------------------------------------- - //Returns true if this is the null allele - public boolean isNull() { return isNull; } - // Returns true if this is not the null allele - public boolean isNonNull() { return ! isNull(); } - // Returns true if this is the NO_CALL allele public boolean isNoCall() { return isNoCall; } // Returns true if this is not the NO_CALL allele @@ -319,7 +313,7 @@ public class Allele implements Comparable { // Returns a nice string representation of this object public String toString() { - return (isNull() ? NULL_ALLELE_STRING : ( isNoCall() ? NO_CALL_STRING : getDisplayString() )) + (isReference() ? "*" : ""); + return ( isNoCall() ? NO_CALL_STRING : getDisplayString() ) + (isReference() ? "*" : ""); } /** @@ -384,27 +378,27 @@ public class Allele implements Comparable { * @return true if this and other are equal */ public boolean equals(Allele other, boolean ignoreRefState) { - return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases)); + return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases)); } /** * @param test bases to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); } /** * @param test bases to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } /** * @param test allele to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } @@ -421,10 +415,6 @@ public class Allele implements Comparable { // // --------------------------------------------------------------------------------------------------------- - public static Allele getMatchingAllele(Collection allAlleles, String alleleBases) { - return getMatchingAllele(allAlleles, alleleBases.getBytes()); - } - public static Allele getMatchingAllele(Collection allAlleles, byte[] alleleBases) { for ( Allele a : allAlleles ) { if ( a.basesMatch(alleleBases) ) { @@ -438,26 +428,6 @@ public class Allele implements Comparable { return null; // couldn't find anything } - public static List resolveAlleles(List possibleAlleles, List alleleStrings) { - List myAlleles = new ArrayList(alleleStrings.size()); - - for ( String alleleString : alleleStrings ) { - Allele allele = getMatchingAllele(possibleAlleles, alleleString); - - if ( allele == null ) { - if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) { - allele = create(alleleString); - } else { - throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles); - } - } - - myAlleles.add(allele); - } - - return myAlleles; - } - public int compareTo(Allele other) { if ( isReference() && other.isNonReference() ) return -1; @@ -468,9 +438,6 @@ public class Allele implements Comparable { } public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) { - if ( a1.isNull() || a2.isNull() ) - return true; - if ( a2.length() >= a1.length() ) return firstIsPrefixOfSecond(a1, a2); else diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java index fb0d7140d..127f91677 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java @@ -216,6 +216,7 @@ final class CommonInfo { Object x = getAttribute(key); if ( x == null ) return defaultValue; if ( x instanceof Double ) return (Double)x; + if ( x instanceof Integer ) return (Integer)x; return Double.valueOf((String)x); // throws an exception if this isn't a string } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 1f0b2b054..2211cfe5e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.utils.variantcontext; +import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; @@ -176,6 +177,10 @@ import java.util.*; * @author depristo */ public class VariantContext implements Feature { // to enable tribble integration + private final static boolean WARN_ABOUT_BAD_END = true; + final protected static Logger logger = Logger.getLogger(VariantContext.class); + + private boolean fullyDecoded = false; protected CommonInfo commonInfo = null; public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; @@ -183,8 +188,6 @@ public class VariantContext implements Feature { // to enable tribble integratio @Deprecated // ID is no longer stored in the attributes map private final static String ID_KEY = "ID"; - private final Byte REFERENCE_BASE_FOR_INDEL; - public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet()); /** The location of this VariantContext */ @@ -223,7 +226,6 @@ public class VariantContext implements Feature { // to enable tribble integratio // --------------------------------------------------------------------------------------------------------- public enum Validation { - REF_PADDING, ALLELES, GENOTYPES } @@ -245,7 +247,7 @@ public class VariantContext implements Feature { // to enable tribble integratio this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getLog10PError(), other.getFiltersMaybeNull(), - other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, + other.getAttributes(), other.fullyDecoded, NO_VALIDATION); } @@ -261,7 +263,6 @@ public class VariantContext implements Feature { // to enable tribble integratio * @param log10PError qual * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes - * @param referenceBaseForIndel padded reference base * @param validationToPerform set of validation steps to take */ protected VariantContext(final String source, @@ -274,7 +275,6 @@ public class VariantContext implements Feature { // to enable tribble integratio final double log10PError, final Set filters, final Map attributes, - final Byte referenceBaseForIndel, final boolean fullyDecoded, final EnumSet validationToPerform ) { if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } @@ -287,7 +287,6 @@ public class VariantContext implements Feature { // to enable tribble integratio this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID; this.commonInfo = new CommonInfo(source, log10PError, filters, attributes); - REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel; // todo -- remove me when this check is no longer necessary if ( this.commonInfo.hasAttribute(ID_KEY) ) @@ -335,11 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio * in this VC is returned as the set of alleles in the subContext, even if * some of those alleles aren't in the samples * - * @param sampleNames - * @return + * WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING? + * + * @param sampleNames the sample names + * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default + * @return new VariantContext subsetting to just the given samples */ public VariantContext subContextFromSamples(Set sampleNames, final boolean rederiveAllelesFromGenotypes ) { - if ( sampleNames.containsAll(getSampleNames()) ) { + if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) { return this; // fast path when you don't have any work to do } else { VariantContextBuilder builder = new VariantContextBuilder(this); @@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio } } + /** + * @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true + * + * @param sampleNames + * @return + */ + public VariantContext subContextFromSamples(final Set sampleNames) { + return subContextFromSamples(sampleNames, true); + } + public VariantContext subContextFromSample(String sampleName) { - return subContextFromSamples(Collections.singleton(sampleName), true); + return subContextFromSamples(Collections.singleton(sampleName)); } /** @@ -496,7 +508,7 @@ public class VariantContext implements Feature { // to enable tribble integratio */ public boolean isSimpleInsertion() { // can't just call !isSimpleDeletion() because of complex indels - return getType() == Type.INDEL && getReference().isNull() && isBiallelic(); + return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1; } /** @@ -504,7 +516,7 @@ public class VariantContext implements Feature { // to enable tribble integratio */ public boolean isSimpleDeletion() { // can't just call !isSimpleInsertion() because of complex indels - return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic(); + return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1; } /** @@ -548,22 +560,6 @@ public class VariantContext implements Feature { // to enable tribble integratio return ID; } - public boolean hasReferenceBaseForIndel() { - return REFERENCE_BASE_FOR_INDEL != null; - } - - // the indel base that gets stripped off for indels - public Byte getReferenceBaseForIndel() { - return REFERENCE_BASE_FOR_INDEL; - } - - public String getAlleleStringWithRefPadding(final Allele allele) { - if ( VCFAlleleClipper.needsPadding(this) ) - return VCFAlleleClipper.padAllele(this, allele).getDisplayString(); - else - return allele.getDisplayString(); - } - // --------------------------------------------------------------------------------------------------------- // @@ -803,8 +799,8 @@ public class VariantContext implements Feature { // to enable tribble integratio * Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map * for consistency with the multi-get function. * - * @param sampleName - * @return + * @param sampleName the sample name + * @return mapping from sample name to genotype * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ public GenotypesContext getGenotypes(String sampleName) { @@ -818,7 +814,7 @@ public class VariantContext implements Feature { // to enable tribble integratio * For testing convenience only * * @param sampleNames a unique list of sample names - * @return + * @return subsetting genotypes context * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ protected GenotypesContext getGenotypes(Collection sampleNames) { @@ -1006,13 +1002,13 @@ public class VariantContext implements Feature { // to enable tribble integratio /** * Run all extra-strict validation tests on a Variant Context object * - * @param reference the true reference allele - * @param paddedRefBase the reference base used for padding indels - * @param rsIDs the true dbSNP IDs + * @param reportedReference the reported reference allele + * @param observedReference the actual reference allele + * @param rsIDs the true dbSNP IDs */ - public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set rsIDs) { + public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set rsIDs) { // validate the reference - validateReferenceBases(reference, paddedRefBase); + validateReferenceBases(reportedReference, observedReference); // validate the RS IDs validateRSIDs(rsIDs); @@ -1027,18 +1023,9 @@ public class VariantContext implements Feature { // to enable tribble integratio //checkReferenceTrack(); } - public void validateReferenceBases(Allele reference, Byte paddedRefBase) { - if ( reference == null ) - return; - - // don't validate if we're a complex event - if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) { - throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); - } - - // we also need to validate the padding base for simple indels - if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) { - throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue())); + public void validateReferenceBases(final Allele reportedReference, final Allele observedReference) { + if ( reportedReference != null && !reportedReference.basesMatch(observedReference) ) { + throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), observedReference.getBaseString(), reportedReference.getBaseString())); } } @@ -1130,7 +1117,6 @@ public class VariantContext implements Feature { // to enable tribble integratio for (final Validation val : validationToPerform ) { switch (val) { case ALLELES: validateAlleles(); break; - case REF_PADDING: validateReferencePadding(); break; case GENOTYPES: validateGenotypes(); break; default: throw new IllegalArgumentException("Unexpected validation mode " + val); } @@ -1146,27 +1132,28 @@ public class VariantContext implements Feature { // to enable tribble integratio if ( hasAttribute(VCFConstants.END_KEY) ) { final int end = getAttributeAsInt(VCFConstants.END_KEY, -1); assert end != -1; - if ( end != getEnd() ) - throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + if ( end != getEnd() ) { + final String message = "Badly formed variant context at location " + getChr() + ":" + getStart() + "; getEnd() was " + getEnd() - + " but this VariantContext contains an END key with value " + end); + + " but this VariantContext contains an END key with value " + end; + if ( WARN_ABOUT_BAD_END ) + logger.warn(message); + else + throw new ReviewedStingException(message); + } + } else { + final long length = (stop - start) + 1; + if ( ! hasSymbolicAlleles() && length != getReference().length() ) { + throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); + } } } - private void validateReferencePadding() { - if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding... - return; - - boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed - - if ( needsPadding && !hasReferenceBaseForIndel() ) - throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided."); - } - private void validateAlleles() { - // check alleles - boolean alreadySeenRef = false, alreadySeenNull = false; - for ( Allele allele : alleles ) { + + boolean alreadySeenRef = false; + + for ( final Allele allele : alleles ) { // make sure there's only one reference allele if ( allele.isReference() ) { if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); @@ -1176,26 +1163,11 @@ public class VariantContext implements Feature { // to enable tribble integratio if ( allele.isNoCall() ) { throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this); } - - // make sure there's only one null allele - if ( allele.isNull() ) { - if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this); - alreadySeenNull = true; - } } // make sure there's one reference allele if ( ! alreadySeenRef ) throw new IllegalArgumentException("No reference allele found in VariantContext"); - -// if ( getType() == Type.INDEL ) { -// if ( getReference().length() != (getLocation().size()-1) ) { - long length = (stop - start) + 1; - if ( ! hasSymbolicAlleles() - && ((getReference().isNull() && length != 1 ) - || (getReference().isNonNull() && (length - getReference().length() > 1)))) { - throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); - } } private void validateGenotypes() { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index f2375f6f9..d8ab4bd23 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -25,9 +25,6 @@ package org.broadinstitute.sting.utils.variantcontext; import com.google.java.contract.*; -import org.broad.tribble.Feature; -import org.broad.tribble.TribbleException; -import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -74,7 +71,6 @@ public class VariantContextBuilder { private Set filters = null; private Map attributes = null; private boolean attributesCanBeModified = false; - private Byte referenceBaseForIndel = null; /** enum of what must be validated */ final private EnumSet toValidate = EnumSet.noneOf(VariantContext.Validation.class); @@ -117,7 +113,6 @@ public class VariantContextBuilder { this.genotypes = parent.genotypes; this.ID = parent.getID(); this.log10PError = parent.getLog10PError(); - this.referenceBaseForIndel = parent.getReferenceBaseForIndel(); this.source = parent.getSource(); this.start = parent.getStart(); this.stop = parent.getEnd(); @@ -132,7 +127,6 @@ public class VariantContextBuilder { this.genotypes = parent.genotypes; this.ID = parent.ID; this.log10PError = parent.log10PError; - this.referenceBaseForIndel = parent.referenceBaseForIndel; this.source = parent.source; this.start = parent.start; this.stop = parent.stop; @@ -362,21 +356,6 @@ public class VariantContextBuilder { return this; } - /** - * Tells us that the resulting VariantContext should use this byte for the reference base - * Null means no refBase is available - * @param referenceBaseForIndel - */ - public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) { - this.referenceBaseForIndel = referenceBaseForIndel; - toValidate.add(VariantContext.Validation.REF_PADDING); - return this; - } - - public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) { - return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]); - } - /** * Tells us that the resulting VariantContext should have source field set to source * @param source @@ -401,7 +380,6 @@ public class VariantContextBuilder { this.start = start; this.stop = stop; toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -416,7 +394,6 @@ public class VariantContextBuilder { this.start = loc.getStart(); this.stop = loc.getStop(); toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -440,7 +417,6 @@ public class VariantContextBuilder { public VariantContextBuilder start(final long start) { this.start = start; toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -517,6 +493,6 @@ public class VariantContextBuilder { public VariantContext make() { return new VariantContext(source, ID, contig, start, stop, alleles, genotypes, log10PError, filters, attributes, - referenceBaseForIndel, fullyDecoded, toValidate); + fullyDecoded, toValidate); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index d7e072980..a8f956413 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -64,9 +64,9 @@ public class VariantContextUtils { * Ensures that VC contains all of the samples in allSamples by adding missing samples to * the resulting VC with default diploid ./. genotypes * - * @param vc - * @param allSamples - * @return + * @param vc the VariantContext + * @param allSamples all of the samples needed + * @return a new VariantContext with missing samples added */ public static VariantContext addMissingSamples(final VariantContext vc, final Set allSamples) { // TODO -- what's the fastest way to do this calculation? @@ -376,9 +376,9 @@ public class VariantContextUtils { /** * @deprecated use variant context builder version instead - * @param vc - * @param keysToPreserve - * @return + * @param vc the variant context + * @param keysToPreserve the keys to preserve + * @return a pruned version of the original variant context */ @Deprecated public static VariantContext pruneVariantContext(final VariantContext vc, Collection keysToPreserve ) { @@ -486,14 +486,13 @@ public class VariantContextUtils { if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE ) verifyUniqueSampleNames(unsortedVCs); - final List prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); + final List preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); // Make sure all variant contexts are padded with reference base in case of indels if necessary final List VCs = new ArrayList(); - for (final VariantContext vc : prepaddedVCs) { - // also a reasonable place to remove filtered calls, if needed + for (final VariantContext vc : preFilteredVCs) { if ( ! filteredAreUncalled || vc.isNotFiltered() ) - VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc)); + VCs.add(vc); } if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; @@ -547,9 +546,6 @@ public class VariantContextUtils { filters.addAll(vc.getFilters()); - if ( referenceBaseForIndel == null ) - referenceBaseForIndel = vc.getReferenceBaseForIndel(); - // // add attributes // @@ -661,10 +657,9 @@ public class VariantContextUtils { builder.genotypes(genotypes); builder.log10PError(log10PError); builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); - builder.referenceBaseForIndel(referenceBaseForIndel); // Trim the padded bases of all alleles if necessary - final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make()); + final VariantContext merged = builder.make(); if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); return merged; } @@ -700,73 +695,6 @@ public class VariantContextUtils { return true; } - private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { - // see if we need to trim common reference base from all alleles - boolean trimVC; - - // We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles - Allele refAllele = inputVC.getReference(); - if (!inputVC.isVariant()) - trimVC = false; - else if (refAllele.isNull()) - trimVC = false; - else { - trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0)); - } - - // nothing to do if we don't need to trim bases - if (trimVC) { - List alleles = new ArrayList(); - GenotypesContext genotypes = GenotypesContext.create(); - - Map originalToTrimmedAlleleMap = new HashMap(); - - for (final Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); - Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation - // example: mixed records such as {TA*,TGA,TG} - boolean hasNullAlleles = false; - - for (final Allele a: originalToTrimmedAlleleMap.values()) { - if (a.isNull()) - hasNullAlleles = true; - } - - if (!hasNullAlleles) - return inputVC; - // now we can recreate new genotypes with trimmed alleles - for ( final Genotype genotype : inputVC.getGenotypes() ) { - - List originalAlleles = genotype.getAlleles(); - List trimmedAlleles = new ArrayList(); - for ( final Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); - - } - - final VariantContextBuilder builder = new VariantContextBuilder(inputVC); - return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make(); - } - - return inputVC; - } - public static GenotypesContext stripPLs(GenotypesContext genotypes) { GenotypesContext newGs = GenotypesContext.create(genotypes.size()); @@ -819,7 +747,7 @@ public class VariantContextUtils { if ( !mappedVCs.containsKey(vc.getType()) ) mappedVCs.put(vc.getType(), new ArrayList()); mappedVCs.get(vc.getType()).add(vc); - } + } } return mappedVCs; @@ -881,10 +809,10 @@ public class VariantContextUtils { // // refAllele: ACGTGA // myRef: ACGT - // myAlt: - + // myAlt: A // // We need to remap all of the alleles in vc to include the extra GA so that - // myRef => refAllele and myAlt => GA + // myRef => refAllele and myAlt => AGA // Allele myRef = vc.getReference(); @@ -979,7 +907,7 @@ public class VariantContextUtils { HashMap alleleMap = new HashMap(vc.getAlleles().size()); for ( Allele originalAllele : vc.getAlleles() ) { Allele newAllele; - if ( originalAllele.isNoCall() || originalAllele.isNull() ) + if ( originalAllele.isNoCall() ) newAllele = originalAllele; else newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); @@ -1235,13 +1163,14 @@ public class VariantContextUtils { if ( ! vc.isIndel() ) // only indels are tandem repeats return null; - final Allele ref = vc.getReference(); + final Allele refAllele = vc.getReference(); + final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); byte[] repeatUnit = null; final ArrayList lengths = new ArrayList(); for ( final Allele allele : vc.getAlternateAlleles() ) { - Pair result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes()); + Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes()); final int[] repetitionCount = result.first; // repetition count = 0 means allele is not a tandem expansion of context @@ -1256,7 +1185,7 @@ public class VariantContextUtils { repeatUnit = result.second; if (VERBOSE) { System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); - System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0])); + System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0])); System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); System.out.println("RU:"+new String(repeatUnit)); } @@ -1405,4 +1334,113 @@ public class VariantContextUtils { return start + Math.max(ref.length() - 1, 0); } } + + public static boolean requiresPaddingBase(final List alleles) { + + // see whether one of the alleles would be null if trimmed through + + for ( final String allele : alleles ) { + if ( allele.isEmpty() ) + return true; + } + + int clipping = 0; + Character currentBase = null; + + while ( true ) { + for ( final String allele : alleles ) { + if ( allele.length() - clipping == 0 ) + return true; + + char myBase = allele.charAt(clipping); + if ( currentBase == null ) + currentBase = myBase; + else if ( currentBase != myBase ) + return false; + } + + clipping++; + currentBase = null; + } + } + + public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { + + // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed + + // see whether we need to trim common reference base from all alleles + + final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); + if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) + return inputVC; + + final List alleles = new ArrayList(); + final GenotypesContext genotypes = GenotypesContext.create(); + final Map originalToTrimmedAlleleMap = new HashMap(); + + for (final Allele a : inputVC.getAlleles()) { + if (a.isSymbolic()) { + alleles.add(a); + originalToTrimmedAlleleMap.put(a, a); + } else { + // get bases for current allele and create a new one with trimmed bases + final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); + final Allele trimmedAllele = Allele.create(newBases, a.isReference()); + alleles.add(trimmedAllele); + originalToTrimmedAlleleMap.put(a, trimmedAllele); + } + } + + // now we can recreate new genotypes with trimmed alleles + for ( final Genotype genotype : inputVC.getGenotypes() ) { + final List originalAlleles = genotype.getAlleles(); + final List trimmedAlleles = new ArrayList(); + for ( final Allele a : originalAlleles ) { + if ( a.isCalled() ) + trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); + else + trimmedAlleles.add(Allele.NO_CALL); + } + genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); + } + + return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make(); + } + + public static int computeReverseClipping(final List unclippedAlleles, + final byte[] ref, + final int forwardClipping, + final boolean allowFullClip) { + int clipping = 0; + boolean stillClipping = true; + + while ( stillClipping ) { + for ( final Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) + continue; + + // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong + // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). + if ( a.length() - clipping == 0 ) + return clipping - (allowFullClip ? 0 : 1); + + if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { + stillClipping = false; + } + else if ( ref.length == clipping ) { + if ( allowFullClip ) + stillClipping = false; + else + return -1; + } + else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { + stillClipping = false; + } + } + if ( stillClipping ) + clipping++; + } + + return clipping; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java index 2c1d99546..01dac7eb6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java @@ -191,9 +191,12 @@ public final class BCF2Encoder { @Requires("size >= 0") @Ensures("encodeStream.size() > old(encodeStream.size())") public final void encodeType(final int size, final BCF2Type type) throws IOException { - final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type); - encodeStream.write(typeByte); - if ( BCF2Utils.willOverflow(size) ) { + if ( size <= BCF2Utils.MAX_INLINE_ELEMENTS ) { + final int typeByte = BCF2Utils.encodeTypeDescriptor(size, type); + encodeStream.write(typeByte); + } else { + final int typeByte = BCF2Utils.encodeTypeDescriptor(BCF2Utils.OVERFLOW_ELEMENT_MARKER, type); + encodeStream.write(typeByte); // write in the overflow size encodeTypedInt(size); } @@ -201,12 +204,12 @@ public final class BCF2Encoder { @Ensures("encodeStream.size() > old(encodeStream.size())") public final void encodeRawInt(final int value, final BCF2Type type) throws IOException { - BCF2Utils.encodeRawBytes(value, type, encodeStream); + type.write(value, encodeStream); } @Ensures("encodeStream.size() > old(encodeStream.size())") public final void encodeRawBytes(final int value, final BCF2Type type) throws IOException { - BCF2Utils.encodeRawBytes(value, type, encodeStream); + type.write(value, encodeStream); } // -------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java index 812e6dd07..ddeb4d284 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java @@ -47,7 +47,7 @@ import java.util.Map; */ @Invariant({ "headerLine != null", - "BCF2Type.INTEGERS.contains(dictionaryOffsetType)", + "dictionaryOffsetType.isIntegerType()", "dictionaryOffset >= 0" }) public abstract class BCF2FieldEncoder { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java index 269750c66..219daf315 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java @@ -160,7 +160,7 @@ public class BCF2FieldWriterManager { /** * Get a site writer specialized to encode values for site info field * @param field key found in the VCF header INFO records - * @return + * @return non-null writer if one can be found, or null if none exists for field */ public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) { return getWriter(field, siteWriters); @@ -169,17 +169,14 @@ public class BCF2FieldWriterManager { /** * Get a genotypes writer specialized to encode values for genotypes field * @param field key found in the VCF header FORMAT records - * @return + * @return non-null writer if one can be found, or null if none exists for field */ public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) { return getWriter(field, genotypesWriters); } @Requires({"map != null", "key != null"}) - @Ensures("result != null") public T getWriter(final String key, final Map map) { - final T writer = map.get(key); - if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key); - return writer; + return map.get(key); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 45610bbf9..a080c4e62 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -31,6 +31,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; +import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -83,6 +84,17 @@ import java.util.*; * @since 06/12 */ class BCF2Writer extends IndexingVariantContextWriter { + public static final int MAJOR_VERSION = 2; + public static final int MINOR_VERSION = 1; + + /** + * If true, we will write out the undecoded raw bytes for a genotypes block, if it + * is found in the input VC. This can be very dangerous as the genotype encoding + * depends on the exact ordering of the header. + * + * TODO -- enable when the new smart VCF header code is created by Eric Banks + */ + private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false; final protected static Logger logger = Logger.getLogger(BCF2Writer.class); final private static boolean ALLOW_MISSING_CONTIG_LINES = false; @@ -145,8 +157,8 @@ class BCF2Writer extends IndexingVariantContextWriter { writer.close(); final byte[] headerBytes = capture.toByteArray(); - outputStream.write(BCF2Utils.MAGIC_HEADER_LINE); - BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream); + new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream); + BCF2Type.INT32.write(headerBytes.length, outputStream); outputStream.write(headerBytes); } catch (IOException e) { throw new UserException.CouldNotCreateOutputFile("BCF2 stream", "Got IOException while trying to write BCF2 header", e); @@ -237,9 +249,11 @@ class BCF2Writer extends IndexingVariantContextWriter { private BCF2Codec.LazyData getLazyData(final VariantContext vc) { if ( vc.getGenotypes().isLazyWithData() ) { - LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes(); - if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData ) + LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes(); + if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData ) return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData(); + else + lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long } return null; @@ -264,10 +278,7 @@ class BCF2Writer extends IndexingVariantContextWriter { } private void buildAlleles( VariantContext vc ) throws IOException { - final boolean needsPadding = VCFAlleleClipper.needsPadding(vc); for ( Allele allele : vc.getAlleles() ) { - if ( needsPadding ) - allele = VCFAlleleClipper.padAllele(vc, allele); final byte[] s = allele.getDisplayBases(); if ( s == null ) throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele); @@ -278,6 +289,8 @@ class BCF2Writer extends IndexingVariantContextWriter { private void buildFilter( VariantContext vc ) throws IOException { if ( vc.isFiltered() ) { encodeStringsByRef(vc.getFilters()); + } else if ( vc.filtersWereApplied() ) { + encodeStringsByRef(Collections.singleton(VCFConstants.PASSES_FILTERS_v4)); } else { encoder.encodeTypedMissing(BCF2Type.INT8); } @@ -285,8 +298,9 @@ class BCF2Writer extends IndexingVariantContextWriter { private void buildInfo( VariantContext vc ) throws IOException { for ( Map.Entry infoFieldEntry : vc.getAttributes().entrySet() ) { - final String key = infoFieldEntry.getKey(); - final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key); + final String field = infoFieldEntry.getKey(); + final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field); + if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO"); writer.start(encoder, vc); writer.site(encoder, vc); writer.done(encoder, vc); @@ -294,26 +308,40 @@ class BCF2Writer extends IndexingVariantContextWriter { } private byte[] buildSamplesData(final VariantContext vc) throws IOException { - final BCF2Codec.LazyData lazyData = getLazyData(vc); + final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; - } else { - // we have to do work to convert the VC into a BCF2 byte stream - final List genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header); - for ( final String field : genotypeFields ) { - final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); - - writer.start(encoder, vc); - for ( final String name : sampleNames ) { - Genotype g = vc.getGenotype(name); - if ( g == null ) VCFWriter.missingSampleError(vc, header); - writer.addGenotype(encoder, vc, g); - } - writer.done(encoder, vc); - } - return encoder.getRecordBytes(); } + + // we have to do work to convert the VC into a BCF2 byte stream + final List genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header); + for ( final String field : genotypeFields ) { + final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); + if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); + + writer.start(encoder, vc); + for ( final String name : sampleNames ) { + Genotype g = vc.getGenotype(name); + if ( g == null ) VCFWriter.missingSampleError(vc, header); + writer.addGenotype(encoder, vc, g); + } + writer.done(encoder, vc); + } + return encoder.getRecordBytes(); + } + + /** + * Throws a meaningful error message when a field (INFO or FORMAT) is found when writing out a file + * but there's no header line for it. + * + * @param vc + * @param field + * @param fieldType + */ + private final void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) { + throw new UserException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " + + vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader"); } // -------------------------------------------------------------------------------- @@ -331,14 +359,14 @@ class BCF2Writer extends IndexingVariantContextWriter { */ @Requires({"infoBlock.length > 0", "genotypesBlock.length >= 0"}) private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException { - BCF2Utils.encodeRawBytes(infoBlock.length, BCF2Type.INT32, outputStream); - BCF2Utils.encodeRawBytes(genotypesBlock.length, BCF2Type.INT32, outputStream); + BCF2Type.INT32.write(infoBlock.length, outputStream); + BCF2Type.INT32.write(genotypesBlock.length, outputStream); outputStream.write(infoBlock); outputStream.write(genotypesBlock); } @Requires("! strings.isEmpty()") - @Ensures("BCF2Type.INTEGERS.contains(result)") + @Ensures("result.isIntegerType()") private final BCF2Type encodeStringsByRef(final Collection strings) throws IOException { final List offsets = new ArrayList(strings.size()); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 4548e026e..ea968e153 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -162,7 +162,6 @@ class VCFWriter extends IndexingVariantContextWriter { vc = new VariantContextBuilder(vc).noGenotypes().make(); try { - vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); super.add(vc); Map alleleMap = buildAlleleMap(vc); diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 86b7e60ff..76e25a3c0 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -95,6 +95,8 @@ public abstract class BaseTest { public static final String keysDataLocation = validationDataLocation + "keys/"; public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key"; + public static final String exampleFASTA = publicTestDir + "exampleFASTA.fasta"; + /** before the class starts up */ static { // setup a basic log configuration @@ -280,12 +282,12 @@ public abstract class BaseTest { private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1; public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) { - Assert.assertTrue(actual instanceof Double); + Assert.assertTrue(actual instanceof Double, "Not a double"); assertEqualsDoubleSmart((double)(Double)actual, (double)expected); } public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) { - Assert.assertTrue(actual instanceof Double); + Assert.assertTrue(actual instanceof Double, "Not a double"); assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance); } @@ -301,13 +303,13 @@ public abstract class BaseTest { public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) { if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately - Assert.assertTrue(Double.isNaN(actual)); + Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not"); else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately - Assert.assertTrue(Double.isInfinite(actual)); + Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not"); else { final double delta = Math.abs(actual - expected); final double ratio = Math.abs(actual / expected - 1.0); - Assert.assertTrue(delta < tolerance || ratio < tolerance); + Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance); } } } diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index a997385d6..7e38c00f3 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -169,7 +169,6 @@ public class WalkerTest extends BaseTest { Class expectedException = null; boolean includeImplicitArgs = true; boolean includeShadowBCF = true; - boolean repairHeader = false; // the default output path for the integration test private File outputFileLocation = null; @@ -211,8 +210,6 @@ public class WalkerTest extends BaseTest { String.format(" -et %s -K %s ", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile)); if ( includeShadowBCF && GENERATE_SHADOW_BCF ) args = args + " --generateShadowBCF "; - if ( repairHeader ) - args = args + " --repairVCFHeader public/data/vcfHeaderForRepairs.vcf "; } return args; @@ -224,7 +221,6 @@ public class WalkerTest extends BaseTest { * which will ultimately blow up... */ public void disableShadowBCF() { this.includeShadowBCF = false; } - public void repairHeaders() { this.repairHeader = true; } public void setOutputFileLocation(File outputFileLocation) { this.outputFileLocation = outputFileLocation; } @@ -367,10 +363,16 @@ public class WalkerTest extends BaseTest { // it's the type we expected //System.out.println(String.format(" => %s PASSED", name)); } else { - if ( e.getCause() != null ) - e.getCause().printStackTrace(System.out); // must print to stdout to see the message - Assert.fail(String.format("Test %s expected exception %s but instead got %s with error message %s", - name, expectedException, e.getClass(), e.getMessage())); + final String message = String.format("Test %s expected exception %s but instead got %s with error message %s", + name, expectedException, e.getClass(), e.getMessage()); + if ( e.getCause() != null ) { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final PrintStream ps = new PrintStream(baos); + e.getCause().printStackTrace(ps); + BaseTest.log(message); + BaseTest.log(baos.toString()); + } + Assert.fail(message); } } else { // we didn't expect an exception but we got one :-( diff --git a/public/java/test/org/broadinstitute/sting/gatk/CommandLineGATKUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/CommandLineGATKUnitTest.java new file mode 100644 index 000000000..5de48fda9 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/CommandLineGATKUnitTest.java @@ -0,0 +1,43 @@ +package org.broadinstitute.sting.gatk; + +import net.sf.samtools.SAMFileReader; +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +/** + * @author Eric Banks + * @since 7/18/12 + */ +public class CommandLineGATKUnitTest extends BaseTest { + + @Test(enabled = true) + public void testSamTextFileError1() { + final File samFile = new File(publicTestDir + "testfile.sam"); + final File indexFile = new File(publicTestDir + "HiSeq.1mb.1RG.bai"); + try { + final SAMFileReader reader = new SAMFileReader(samFile, indexFile, false); + + // we shouldn't get here + Assert.fail("We should have exceptioned out when trying to create a reader with an index for a textual SAM file"); + } catch (RuntimeException e) { + Assert.assertTrue(e.getMessage().indexOf(CommandLineGATK.PICARD_TEXT_SAM_FILE_ERROR_1) != -1); + } + } + + @Test(enabled = true) + public void testSamTextFileError2() { + File samFile = new File(publicTestDir + "testfile.sam"); + try { + final SAMFileReader reader = new SAMFileReader(samFile); + reader.getFilePointerSpanningReads(); + + // we shouldn't get here + Assert.fail("We should have exceptioned out when trying to call getFilePointerSpanningReads() for a textual SAM file"); + } catch (RuntimeException e) { + Assert.assertTrue(e.getMessage().indexOf(CommandLineGATK.PICARD_TEXT_SAM_FILE_ERROR_2) != -1); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 01af55ca3..5c4db08bd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -90,7 +90,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { super(EngineErrorHandlingTestProvider.class); this.expectedException = exceptedException; this.multiThreaded = multiThreaded; - this.iterationsToTest = multiThreaded ? 10 : 1; + this.iterationsToTest = multiThreaded ? 1000 : 1; setName(String.format("Engine error handling: expected %s, is-multithreaded %b", exceptedException, multiThreaded)); } } @@ -110,9 +110,9 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { // Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type // @Test(dataProvider = "EngineErrorHandlingTestProvider") - public void testEngineErrorHandlingTestProvider(EngineErrorHandlingTestProvider cfg) { + public void testEngineErrorHandlingTestProvider(final EngineErrorHandlingTestProvider cfg) { for ( int i = 0; i < cfg.iterationsToTest; i++ ) { - final String root = "-T ErrorThrowing -R " + b37KGReference; + final String root = "-T ErrorThrowing -R " + exampleFASTA; final String args = root + (cfg.multiThreaded ? " -nt 2" : "") + " -E " + cfg.expectedException.getSimpleName(); WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException); executeTest(cfg.toString(), spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index 2f8b1e9b5..d8905ad35 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -28,7 +28,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.ArgumentException; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.walkers.PrintReadsWalker; +import org.broadinstitute.sting.gatk.walkers.PrintReads; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.testng.annotations.Test; @@ -71,7 +71,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { public void testEmptyIntervalSetHandling() throws Exception { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); - testEngine.setWalker(new PrintReadsWalker()); + testEngine.setWalker(new PrintReads()); testEngine.setIntervals(new GenomeLocSortedSet(null)); testEngine.validateSuppliedIntervals(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java index 6149a1e51..1eb340356 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java @@ -24,13 +24,12 @@ package org.broadinstitute.sting.gatk; -import org.testng.Assert; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker; +import org.broadinstitute.sting.gatk.walkers.qc.CountLoci; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; - +import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -48,7 +47,7 @@ public class WalkerManagerUnitTest { @Test public void testPresentWalker() { Walker countLociWalker = walkerManager.createByName("CountLoci"); - Assert.assertEquals(CountLociWalker.class,countLociWalker.getClass()); + Assert.assertEquals(CountLoci.class,countLociWalker.getClass()); } @Test(expectedExceptions=UserException.class) @@ -58,7 +57,7 @@ public class WalkerManagerUnitTest { @Test(expectedExceptions=DynamicClassResolutionException.class) public void testUninstantiableWalker() { - walkerManager.createByName("Uninstantiable"); + walkerManager.createByName("UninstantiableWalker"); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKWalkerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKWalkerBenchmark.java index 564d1e2a3..66585c872 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKWalkerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKWalkerBenchmark.java @@ -35,8 +35,8 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker; -import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker; +import org.broadinstitute.sting.gatk.walkers.qc.CountLoci; +import org.broadinstitute.sting.gatk.walkers.qc.CountReads; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -99,7 +99,7 @@ public class GATKWalkerBenchmark extends ReadProcessingBenchmark { private enum WalkerType { COUNT_READS { @Override - Walker create() { return new CountReadsWalker(); } + Walker create() { return new CountReads(); } }, COUNT_BASES_IN_READ { @Override @@ -108,8 +108,8 @@ public class GATKWalkerBenchmark extends ReadProcessingBenchmark { COUNT_LOCI { @Override Walker create() { - CountLociWalker walker = new CountLociWalker(); - JVMUtils.setFieldValue(JVMUtils.findField(CountLociWalker.class,"out"),walker,System.out); + CountLoci walker = new CountLoci(); + JVMUtils.setFieldValue(JVMUtils.findField(CountLoci.class,"out"),walker,System.out); return walker; } }; diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java index 1c5dab254..f2c546317 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java @@ -24,9 +24,12 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMProgramRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.Tags; @@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.AfterMethod; @@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest { fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception"); } } + + /** Test that we clear program records when requested */ + @Test + public void testRemoveProgramRecords() { + logger.warn("Executing testRemoveProgramRecords"); + + // setup the data + readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags())); + + // use defaults + SAMDataSource data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false); + + List defaultProgramRecords = data.getHeader().getProgramRecords(); + assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor"); + + boolean removeProgramRecords = false; + data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, // no BAQ + null, // no BQSR + (byte) -1, + removeProgramRecords); + + List dontRemoveProgramRecords = data.getHeader().getProgramRecords(); + assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false"); + + removeProgramRecords = true; + data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, // no BAQ + null, // no BQSR + (byte) -1, + removeProgramRecords); + + List doRemoveProgramRecords = data.getHeader().getProgramRecords(); + assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true"); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java index 724c343e4..6264758ad 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java @@ -131,7 +131,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest { @Test public void testGenerateIndexForUnindexedFile() { - File vcfFile = new File(validationDataLocation + "/ROD_validation/always_reindex.vcf"); + File vcfFile = new File(privateTestDir + "always_reindex.vcf"); File vcfFileIndex = Tribble.indexFile(vcfFile); // if we can't write to the directory, don't fault the tester, just pass @@ -141,7 +141,8 @@ public class RMDTrackBuilderUnitTest extends BaseTest { } // clean-up our test, and previous tests that may have written the file vcfFileIndex.deleteOnExit(); - if (vcfFileIndex.exists()) vcfFileIndex.delete(); + if (vcfFileIndex.exists()) + vcfFileIndex.delete(); try { builder.loadIndex(vcfFile, new VCFCodec()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index 9226f97e2..7845515d8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -1,25 +1,22 @@ package org.broadinstitute.sting.gatk.traversals; -import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.ReadShardBalancer; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; -import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.walkers.qc.CountReads; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; - -import static org.testng.Assert.fail; - import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -31,6 +28,8 @@ import java.io.PrintStream; import java.util.ArrayList; import java.util.List; +import static org.testng.Assert.fail; + /** * * User: aaron @@ -106,7 +105,7 @@ public class TraverseReadsUnitTest extends BaseTest { bamList = new ArrayList(); bamList.add(bam); - countReadWalker = new CountReadsWalker(); + countReadWalker = new CountReads(); traversalEngine = new TraverseReads(); traversalEngine.initialize(engine); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java index 0fcaad3bf..1aaa00aee 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java @@ -59,7 +59,7 @@ public class PrintReadsUnitTest extends BaseTest { private ReferenceContext bases = null; //private ReferenceContext ref = new ReferenceContext() - PrintReadsWalker walker; + PrintReads walker; ArtificialSAMFileWriter writer; @BeforeMethod @@ -67,7 +67,7 @@ public class PrintReadsUnitTest extends BaseTest { trav = new ArtificialReadsTraversal(); readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads; - walker = new PrintReadsWalker(); + walker = new PrintReads(); writer = new ArtificialSAMFileWriter(); walker.out = writer; walker.initialize(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 0b45dc931..17d27c156 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e0a08416249515ea18bd0663c90c9330")); + Arrays.asList("95b0627bfcac2191aed9908904e892ff")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("0b60da46ba0eabb3abe5e0288937f9b0")); + Arrays.asList("0e2509349fd6c8a9e9408c918215e1de")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("5eb576d0234c912d8efea184492691d0")); + Arrays.asList("32d81a7797605afb526983a2ab45efc2")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("8860524d793d24b2e32f318433fcf527")); + Arrays.asList("350539ccecea0d1f7fffd4ac29c015e7")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("ebbf32f5b8b8d22f2eb247a0a3db3da0")); + Arrays.asList("c222361819fae035a0162f876990fdee")); executeTest("test overwriting header", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index 8e9f2533f..f1ffbe80f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -33,15 +34,15 @@ public class BQSRGathererUnitTest { for (GATKReportTable originalTable : originalReport.getTables()) { GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName()); List columnsToTest = new LinkedList(); - columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL - columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME); + columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL + columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1); } - else if (originalTable.getTableName().equals(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE)) { - columnsToTest.add(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); + else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) { + columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 1c5db4262..4611f3a40 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec( "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", 1, - Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); + Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65")); executeTest("testFastaAlternateReferenceIndels", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index ae5128c75..97b985a29 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -99,4 +99,13 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { Arrays.asList("8077eb3bab5ff98f12085eb04176fdc9")); executeTest("test deletions", spec); } + + @Test + public void testUnfilteredBecomesFilteredAndPass() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " --filterExpression 'FS > 60.0' --filterName SNP_FS -V " + privateTestDir + "unfilteredForFiltering.vcf", 1, + Arrays.asList("8ed32a2272bab8043a255362335395ef")); + executeTest("testUnfilteredBecomesFilteredAndPass", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index 256f93473..f7f7999be 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -46,8 +46,9 @@ import java.util.*; public class ArtificialReadPileupTestProvider { + final String refBases = "ACAGAGCTGACCCTCCCTCCCCTCTCCCAGTGCAACAGCACGGGCGGCGACTGCTTTTACCGAGGCTACACGTCAGGCGTGGCGGCTGTCCAGGACTGGTACCACTTCCACTATGTGGATCTCTGCTGAGGACCAGGAAAGCCAGCACCCGCAGAGACTCTTCCCCAGTGCTCCATACGATCACCATTCTCTGCAGAAGGTCAGACGTCACTGGTGGCCCCCCAGCCTCCTCAGCAGGGAAGGATACTGTCCCGCAGATGAGATGAGCGAGAGCCGCCAGACCCACGTGACGCTGCACGACATCGACCCTCAGGCCTTGGACCAGCTGGTGCAGTTTGCCTACACGGCTGAGATTGTGGTGGGCGAGGGC"; final int contigStart = 1; - final int contigStop = 10; + final int contigStop = refBases.length(); final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1); // final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic"); final String artificialContig = "chr1"; @@ -57,16 +58,18 @@ public class ArtificialReadPileupTestProvider { final int artificialMappingQuality = 60; Map sample2RG = new HashMap(); List sampleRGs; - - final String refBases = "AGGATACTGT"; List sampleNames = new ArrayList(); private String sampleName(int i) { return sampleNames.get(i); } private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); } - public final int offset = 5; + public final int locStart = 105; // start position where we desire artificial variant + private final int readLength = 10; // desired read length in pileup + public final int readOffset = 4; + private final int readStart = locStart - readOffset; public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); - public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset); - public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10); - public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes()); + public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,locStart,locStart); + public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,locStart-100,locStart+100); + public final String windowBases = refBases.substring(locStart-100-1,locStart+100); + public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,windowBases.getBytes()); byte BASE_QUAL = 50; @@ -90,7 +93,7 @@ public class ArtificialReadPileupTestProvider { return sampleNames; } public byte getRefByte() { - return refBases.substring(offset,offset+1).getBytes()[0]; + return referenceContext.getBase(); } public ReferenceContext getReferenceContext() { return referenceContext;} @@ -99,43 +102,34 @@ public class ArtificialReadPileupTestProvider { public Map getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) { return getAlignmentContextFromAlleles(eventLength, altBases, numReadsPerAllele, false, BASE_QUAL); } - public Map getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele, - boolean addBaseErrors, int phredScaledBaseErrorRate) { - // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); + public Map getAlignmentContextFromAlleles(final int eventLength, + final String altBases, + final int[] numReadsPerAllele, + final boolean addBaseErrors, + final int phredScaledBaseErrorRate) { + final String refChar = new String(new byte[]{referenceContext.getBase()}); - - ArrayList vcAlleles = new ArrayList(); - Allele refAllele, altAllele; - if (eventLength == 0) {// SNP case - refAllele =Allele.create(refBases.substring(offset,offset+1),true); - altAllele = Allele.create(altBases.substring(0,1), false); + String refAllele, altAllele; + if (eventLength == 0) { + // SNP case + refAllele = refChar; + altAllele = altBases.substring(0,1); } else if (eventLength>0){ // insertion - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(altBases.substring(0,eventLength), false); + refAllele = refChar; + altAllele = refChar+altBases/*.substring(0,eventLength)*/; } else { // deletion - refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + refAllele = new String(referenceContext.getForwardBases()).substring(0,Math.abs(eventLength)+1); + altAllele = refChar; } - int stop = loc.getStart(); - vcAlleles.add(refAllele); - vcAlleles.add(altAllele); - - final VariantContextBuilder builder = new VariantContextBuilder().source(""); - builder.loc(loc.getContig(), loc.getStart(), stop); - builder.alleles(vcAlleles); - builder.referenceBaseForIndel(referenceContext.getBase()); - builder.noGenotypes(); - - final VariantContext vc = builder.make(); Map contexts = new HashMap(); for (String sample: sampleNames) { - AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate)); + AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate)); contexts.put(sample,context); } @@ -149,73 +143,79 @@ public class ArtificialReadPileupTestProvider { rg.setSample(name); return rg; } - private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases, + + private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases, int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) { List pileupElements = new ArrayList(); - int readStart = contigStart; - int offset = (contigStop-contigStart+1)/2; - int refAlleleLength = 0; - int readCounter = 0; - int alleleCounter = 0; - for (Allele allele: vc.getAlleles()) { - if (allele.isReference()) - refAlleleLength = allele.getBases().length; - - int alleleLength = allele.getBases().length; - - for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) { - byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); - if (addErrors) - addBaseErrors(readBases, phredScaledErrorRate); - - byte[] readQuals = new byte[readBases.length]; - Arrays.fill(readQuals, (byte)phredScaledErrorRate); - - GATKSAMRecord read = new GATKSAMRecord(header); - read.setBaseQualities(readQuals); - read.setReadBases(readBases); - read.setReadName(artificialReadName+readCounter++); - - boolean isBeforeDeletion = false, isBeforeInsertion = false; - if (allele.isReference()) - read.setCigarString(readBases.length + "M"); - else { - isBeforeDeletion = alleleLengthrefAlleleLength; - if (isBeforeDeletion || isBeforeInsertion) - read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") + - (readBases.length-offset)+"M"); - else // SNP case - read.setCigarString(readBases.length+"M"); - } - - int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0)); - read.setReadPairedFlag(false); - read.setAlignmentStart(readStart); - read.setMappingQuality(artificialMappingQuality); - read.setReferenceName(loc.getContig()); - read.setReadNegativeStrandFlag(false); - read.setAttribute("RG", sampleRG(sample).getReadGroupId()); - - - pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength)); - } - alleleCounter++; - } + final int refAlleleLength = refAllele.length(); + pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true)); + pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false)); return new ReadBackedPileupImpl(loc,pileupElements); } - private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) { + private List createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) { + + int alleleLength = allele.length(); + List pileupElements = new ArrayList(); + + int readCounter = 0; + for ( int d = 0; d < numReadsPerAllele; d++ ) { + byte[] readBases = trueHaplotype(allele, refAlleleLength, readLength); + if (addErrors) + addBaseErrors(readBases, phredScaledErrorRate); + + byte[] readQuals = new byte[readBases.length]; + Arrays.fill(readQuals, (byte)phredScaledErrorRate); + + GATKSAMRecord read = new GATKSAMRecord(header); + read.setBaseQualities(readQuals); + read.setReadBases(readBases); + read.setReadName(artificialReadName+readCounter++); + + boolean isBeforeDeletion = alleleLengthrefAlleleLength; + + int eventLength = alleleLength - refAlleleLength; + if (isReference) + read.setCigarString(readBases.length + "M"); + else { + if (isBeforeDeletion || isBeforeInsertion) + read.setCigarString((readOffset+1)+"M"+ Math.abs(eventLength) + (isBeforeDeletion?"D":"I") + + (readBases.length-readOffset)+"M"); + else // SNP case + read.setCigarString(readBases.length+"M"); + } + + read.setReadPairedFlag(false); + read.setAlignmentStart(readStart); + read.setMappingQuality(artificialMappingQuality); + read.setReferenceName(loc.getContig()); + read.setReadNegativeStrandFlag(false); + read.setAttribute("RG", sampleRG(sample).getReadGroupId()); + + + pileupElements.add(new PileupElement(read,readOffset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases,Math.abs(eventLength))); + } + + return pileupElements; + } + + /** + * Create haplotype with desired allele and reference context + * @param allele Desired allele string + * @param refAlleleLength Length of reference allele. + * @param desiredLength Desired haplotype length + * @return String with haplotype formed by (prefix)+allele bases + postfix + */ + private byte[] trueHaplotype(final String allele, final int refAlleleLength, final int desiredLength) { // create haplotype based on a particular allele - String prefix = refBases.substring(offset); - String alleleBases = new String(allele.getBases()); - String postfix = refBases.substring(offset+refAlleleLength,refBases.length()); - - return (prefix+alleleBases+postfix).getBytes(); - + final int startIdx= locStart - readOffset-1; + final String prefix = refBases.substring(startIdx, locStart-1); + final String postfix = refBases.substring(locStart+refAlleleLength-1,startIdx + desiredLength); + return (prefix+allele+postfix).getBytes(); } private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java index c7ef51d0c..85528f58b 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java @@ -45,7 +45,6 @@ import org.testng.annotations.Test; */ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { - final String refBases = "AGGATACTGT"; final int nSamples = 1; final int[] numReadsPerAllele = new int[]{10,10}; final String SAMPLE_PREFIX = "sample"; @@ -65,21 +64,19 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { @Test public void testBasicConsensusCounts() { // 4 inserted bases, min cnt = 10 - String altBases = "CCTCCTGAGA"; + String altBases = "CCTC"; int eventLength = 4; List alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); + Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength)); - - //altBases = "CCTCMTGAGA"; - + // test deletions eventLength = 3; alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength)); + Assert.assertEquals(alleles.get(0).getBaseString().substring(1,eventLength), new String(pileupProvider.getReferenceContext().getForwardBases()).substring(1,eventLength)); // same with min Reads = 11 alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases); @@ -92,14 +89,14 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { Assert.assertEquals(alleles.size(),0); // test N's in insertions - altBases = "CCTCNTGAGA"; + altBases = "CCTC"; eventLength = 4; alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); + Assert.assertEquals(alleles.get(1).getBaseString().substring(1,eventLength+1), altBases); - altBases = "CCTCNTGAGA"; + altBases = "CCTCN"; eventLength = 5; alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 616503cff..7b6e1ee96 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("f98c38defc8d619609399b4a3ba874e8")); + Arrays.asList("0039fd0464c87e6ce66c4c8670fd8dfa")); executeTest("test MultiSample Pilot1", spec); } @@ -36,7 +36,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("6f0c49b76225e2099c74015b6f79c96d")); + Arrays.asList("d1e68d4db6585ec00213b1d2d05e01a9")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -44,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("055012eca193a1f12421ea79bef1f4e0")); + Arrays.asList("b53860d209f8440f12b78d01606553e1")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("736607ee529b5624a3ab5521ab9e1b35")); + Arrays.asList("61007c22c00a2871237280914a8f88f0")); executeTest("test SingleSample Pilot2", spec); } @@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("f33507add5d5c30448948906467dd3f3")); + Arrays.asList("feda4a38bba096f7b740a146055509c2")); executeTest("test Multiple SNP alleles", spec); } @@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("0d724551e00129730b95fd4d70faaa58")); + Arrays.asList("0ff525e65c5836289c454c76ead5d80e")); executeTest("test reverse trim", spec); } @@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "fe3429b736c50bb770e40c0320d498ed"; + private final static String COMPRESSED_OUTPUT_MD5 = "e1a17f8f852c3d639f26e659d37bc1e5"; @Test public void testCompressedOutput() { @@ -139,7 +139,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinBaseQualityScore() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1, - Arrays.asList("b341b87742848a3224115fe94e73f244")); + Arrays.asList("b0b92abbaaa4c787dce6f1b302f983ee")); executeTest("test min_base_quality_score 26", spec); } @@ -147,7 +147,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("acb5332a267927d78edd51d93685111c")); + Arrays.asList("186d33429756c89aad6cd89424d6dc94")); executeTest("test SLOD", spec); } @@ -155,7 +155,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("74779b59730962bdf36a7a8ef84ac24d")); + Arrays.asList("11b87f68b8530da168c1418513115f30")); executeTest("test NDA", spec); } @@ -163,23 +163,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("036edf58a4ed6c626f53bd2ab34b9f97")); + Arrays.asList("d2be4b1af1f29579c4f96c08e1ddd871")); executeTest("test using comp track", spec); } @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "52b8336f347d182c158e8384b78f5a6d"); + testOutputParameters("-sites_only", "0055bd060e6ef53a6b836903d68953c9"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "281363e6afb3260143bfdb22710e3d0e"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "235bec0a7b2d901442261104db18f5eb"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "a802b672850b6fbc2764611d3ad071d9"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "7c57ede7019063c19aa9d2136045d84f"); } private void testOutputParameters(final String args, final String md5) { @@ -193,7 +193,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("99ef7ba1747c7289ce1f963130539e18")); + Arrays.asList("3f8d724a5158adac4df38c4e2ed04167")); executeTest("test confidence 1", spec1); } @@ -201,7 +201,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("99ef7ba1747c7289ce1f963130539e18")); + Arrays.asList("3f8d724a5158adac4df38c4e2ed04167")); executeTest("test confidence 2", spec2); } @@ -212,12 +212,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "7e1681b9052e357ca4a065fa76c8afb6" ); + testHeterozosity( 0.01, "7e7384a3a52e19f76f368c2f4561d510" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "68a12f3eccac6cf4b27b6424f23628ee" ); + testHeterozosity( 1.0 / 1850, "3d16366d870c086e894c07c9da411795" ); } private void testHeterozosity(final double arg, final String md5) { @@ -241,7 +241,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("b098a7744a448cf91a50886e4cc7d268")); + Arrays.asList("58abc4f504d3afd42271e290ac846c4b")); executeTest(String.format("test multiple technologies"), spec); } @@ -260,7 +260,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("2f008169b82d542ec9cc94908c395a0f")); + Arrays.asList("e247f579f01eb698cfa1ae1e8a3995a8")); executeTest(String.format("test calling with BAQ"), spec); } @@ -279,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("0d4177d7f963f4b4e8568613e7a468f0")); + Arrays.asList("cc2167dce156f70f5a31ac3dce499266")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -307,7 +307,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("181c4ed8dd084b83f8de92123bb85c41")); + Arrays.asList("10c86ff98ad5ab800d208b435bcfbd7d")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("5250cefb1fff262a6a3985dee29c154d")); + Arrays.asList("c0c4dbb050296633a3150b104b77e05a")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("ebba1c06555c469cfb01d87f34aa6783")); + Arrays.asList("2472722f87f8718861698f60bbba2462")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -335,13 +335,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("01fd223deb4f88fb7d9ee9736b664d8a")); + Arrays.asList("eeb64b261f0a44aa478d753dbbf9378e")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("c48c8a1a8ec88c6f3c99187e08496ae0")); + Arrays.asList("d0a66c234056bb83dd84113bc2421f1e")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -355,6 +355,19 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec); } + @Test + public void testBaseIndelQualityScores() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommandIndelsb37 + + " -I " + privateTestDir + "NA12878.100kb.BQSRv2.example.bam" + + " -o %s" + + " -L 20:10,000,000-10,100,000", + 1, + Arrays.asList("b3c923ed9efa04b85fc18a9b45c8d2a6")); + + executeTest(String.format("test UG with base indel quality scores"), spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing SnpEff @@ -373,18 +386,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- // - // testing SnpEff + // testing MinIndelFraction // // -------------------------------------------------------------------------------------------------------------- final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation + "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030"; - + @Test public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("25465c6dd3c4845f61b0f8e383388824")); + Arrays.asList("160600dfa8e46f91dbb5d574517aac74")); executeTest("test minIndelFraction 0.0", spec); } @@ -403,4 +416,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { Arrays.asList("3f07efb768e08650a7ce333edd4f9a52")); executeTest("test minIndelFraction 1.0", spec); } + + // -------------------------------------------------------------------------------------------------------------- + // + // testing Ns in CIGAR + // + // -------------------------------------------------------------------------------------------------------------- + + @Test + public void testNsInCigar() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, + Arrays.asList("22c9fd65ce3298bd7fbf400c9c209f29")); + executeTest("test calling on reads with Ns in CIGAR", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 19d1e4cb3..9b0fbf650 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","f8721f4f5d3bae2848ae15c3f120709b") + Arrays.asList("f4b0b5471e03306ee2fad27d88b217b6","f8721f4f5d3bae2848ae15c3f120709b") ); executeTest("testTrueNegativeMV", spec); } @@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","547fdfef393f3045a96d245ef6af8acb") + Arrays.asList("dbc64776dcc9e01a468b61e4e0db8277","547fdfef393f3045a96d245ef6af8acb") ); executeTest("testTruePositiveMV", spec); } @@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("719d681bb0a52a40bc854bba107c5c94","9529e2bf214d72e792d93fbea22a3b91") + Arrays.asList("37793e78861bb0bc070884da67dc10e6","9529e2bf214d72e792d93fbea22a3b91") ); executeTest("testFalsePositiveMV", spec); } @@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","8c157d79dd00063d2932f0d2b96f53d8") + Arrays.asList("e4da7639bb542d6440975da12b94973f","8c157d79dd00063d2932f0d2b96f53d8") ); executeTest("testSpecialCases", spec); } @@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","343e418850ae4a687ebef2acd55fcb07") + Arrays.asList("ab92b714471a000285577d540e1fdc2e","343e418850ae4a687ebef2acd55fcb07") ); executeTest("testPriorOption", spec); } @@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-fatherAlleleFirst" ), 2, - Arrays.asList("60ced3d078792a150a03640b62926857","52ffa82428e63ade22ea37b72ae58492") + Arrays.asList("4b937c1b4e96602a7479b07b59254d06","52ffa82428e63ade22ea37b72ae58492") ); executeTest("testFatherAlleleFirst", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java deleted file mode 100755 index 04c7caf5d..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ /dev/null @@ -1,368 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.io.File; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class RecalibrationWalkersIntegrationTest extends WalkerTest { - static HashMap paramsFiles = new HashMap(); - static HashMap paramsFilesSolidIndels = new HashMap(); - - private static final class CCTest extends TestDataProvider { - String file, md5; - - private CCTest(final String file, final String md5) { - super(CCTest.class); - this.file = file; - this.md5 = md5; - } - - public String toString() { - return "CCTest: " + file; - } - } - - @DataProvider(name = "cctestdata") - public Object[][] createCCTestData() { - - new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "ab4940a16ab990181bd8368c76b23853" ); - new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941"); - new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" ); - new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "932f0063abb2a23c22ec992ef8d36aa5" ); - return CCTest.getTests(CCTest.class); - } - - @Test(dataProvider = "cctestdata") - public void testCountCovariates1(CCTest test) { - testCC(test, ""); - } - - @Test(dataProvider = "cctestdata") - public void testCountCovariates4(CCTest test) { - testCC(test, " -nt 4"); - } - - private final void testCC(CCTest test, String parallelism) { - String bam = test.file; - String md5 = test.md5; - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -knownSites " + b36dbSNP129 + - " -T CountCovariates" + - " -I " + bam + - ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) - ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) + - " -cov ReadGroupCovariate" + - " -cov QualityScoreCovariate" + - " -cov CycleCovariate" + - " -cov DinucCovariate" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s" + parallelism, - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testCountCovariates1" + parallelism, spec).getFirst(); - paramsFiles.put(bam, result.get(0).getAbsolutePath()); - } - - - private static final class TRTest extends TestDataProvider { - String file, md5; - - private TRTest(final String file, final String md5) { - super(TRTest.class); - this.file = file; - this.md5 = md5; - } - - public String toString() { - return "TRTest: " + file; - } - } - - @DataProvider(name = "trtestdata") - public Object[][] createTRTestData() { - new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0b7123ae9f4155484b68e4a4f96c5504" ); - new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "d04cf1f6df486e45226ebfbf93a188a5"); - new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" ); - new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "41c2f82f7789421f3690ed3c35b8f2e4" ); - - return TRTest.getTests(TRTest.class); - } - - @Test(dataProvider = "trtestdata", dependsOnMethods = "testCountCovariates1") - public void testTableRecalibrator1(TRTest test) { - String bam = test.file; - String md5 = test.md5; - String paramsFile = paramsFiles.get(bam); - System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - if ( paramsFile != null ) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + b36KGReference + - " -T TableRecalibration" + - " -I " + bam + - ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) - ? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) + - " -o %s" + - " --no_pg_tag" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile " + paramsFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibrator1", spec); - } - else { - throw new IllegalStateException("testTableRecalibrator1: paramsFile was null"); - } - } - - @Test - public void testCountCovariatesUseOriginalQuals() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "0b88d0e8c97e83bdeee2064b6730abff"); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -T CountCovariates" + - " -I " + bam + - " -L 1:1-1,000" + - " -standard" + - " -OQ" + - " -recalFile %s" + - " -knownSites " + b36dbSNP129, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testCountCovariatesUseOriginalQuals", spec); - } - } - - @Test(dependsOnMethods = "testCountCovariates1") - public void testTableRecalibratorMaxQ70() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0b7123ae9f4155484b68e4a4f96c5504" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - String paramsFile = paramsFiles.get(bam); - System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - if ( paramsFile != null ) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + b36KGReference + - " -T TableRecalibration" + - " -I " + bam + - ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) - ? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) + - " -o %s" + - " --no_pg_tag" + - " -maxQ 70" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile " + paramsFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibratorMaxQ70", spec); - } - else { - throw new IllegalStateException("testTableRecalibratorMaxQ70: paramsFile was null"); - } - } - } - - @Test - public void testCountCovariatesSolidIndelsRemoveRefBias() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "8379f24cf5312587a1f92c162ecc220f" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -knownSites " + b36dbSNP129 + - " -T CountCovariates" + - " -I " + bam + - " -standard" + - " -U" + - " -L 1:10,000,000-20,000,000" + - " --solid_recal_mode REMOVE_REF_BIAS" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testCountCovariatesSolidIndelsRemoveRefBias", spec).getFirst(); - paramsFilesSolidIndels.put(bam, result.get(0).getAbsolutePath()); - } - } - - @Test(dependsOnMethods = "testCountCovariatesSolidIndelsRemoveRefBias") - public void testTableRecalibratorSolidIndelsRemoveRefBias() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "2ad4c17ac3ed380071137e4e53a398a5" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - String paramsFile = paramsFilesSolidIndels.get(bam); - System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - if ( paramsFile != null ) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + b36KGReference + - " -T TableRecalibration" + - " -I " + bam + - " -o %s" + - " --no_pg_tag" + - " -U" + - " -L 1:10,000,000-20,000,000" + - " --solid_recal_mode REMOVE_REF_BIAS" + - " -recalFile " + paramsFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibratorSolidIndelsRemoveRefBias", spec); - } - else { - throw new IllegalStateException("testTableRecalibratorSolidIndelsRemoveRefBias: paramsFile was null"); - } - } - } - - @Test - public void testCountCovariatesBED() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7e973328751d233653530245d404a64d"); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -knownSites:bed " + validationDataLocation + "recalibrationTest.bed" + - " -T CountCovariates" + - " -I " + bam + - " -L 1:10,000,000-10,200,000" + - " -standard" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - executeTest("testCountCovariatesBED", spec); - } - } - - @Test - public void testCountCovariatesVCFPlusDBsnp() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "fd9e37879069aa6d84436c25e472b9e9"); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf" + - " -T CountCovariates" + - " -I " + bam + - " -knownSites " + b36dbSNP129 + - " -L 1:10,000,000-10,200,000" + - " -cov ReadGroupCovariate" + - " -cov QualityScoreCovariate" + - " -cov CycleCovariate" + - " -cov DinucCovariate" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - executeTest("testCountCovariatesVCFPlusDBsnp", spec); - } - } - - @Test - public void testCountCovariatesNoIndex() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "aac7df368ca589dc0a66d5bd9ad007e3" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -knownSites " + b36dbSNP129 + - " -T CountCovariates" + - " -I " + bam + - " -cov ReadGroupCovariate" + - " -cov QualityScoreCovariate" + - " --solid_recal_mode DO_NOTHING" + - " -recalFile %s" + - " -U", - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testCountCovariatesNoIndex", spec).getFirst(); - paramsFiles.put(bam, result.get(0).getAbsolutePath()); - } - } - - @Test(dependsOnMethods = "testCountCovariatesNoIndex") - public void testTableRecalibratorNoIndex() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "02249d9933481052df75c58a2a1a8e63" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - String paramsFile = paramsFiles.get(bam); - System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - if ( paramsFile != null ) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + b36KGReference + - " -T TableRecalibration" + - " -I " + bam + - " -o %s" + - " --no_pg_tag" + - " --solid_recal_mode DO_NOTHING" + - " -recalFile " + paramsFile + - " -U", - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibratorNoIndex", spec); - } - else { - throw new IllegalStateException("testTableRecalibratorNoIndex: paramsFile was null"); - } - } - } - - @Test - public void testCountCovariatesFailWithoutDBSNP() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -T CountCovariates" + - " -I " + bam + - " -L 1:10,000,000-10,200,000" + - " -standard" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - UserException.CommandLineException.class); - executeTest("testCountCovariatesFailWithoutDBSNP", spec); - } - } - -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersLargeScaleTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersLargeScaleTest.java deleted file mode 100755 index 3f956e3b9..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersLargeScaleTest.java +++ /dev/null @@ -1,80 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; - -import java.util.ArrayList; - - -public class RecalibrationWalkersLargeScaleTest extends WalkerTest { - - private void testCountCovariatesWholeGenomeRunner(String moreArgs) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + hg18Reference + - " -T CountCovariates" + - " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + - " -L chr1:1-50,000,000" + - " -standard" + - " -OQ" + - " -knownSites " + GATKDataLocation + "dbsnp_132.hg18.vcf" + - " -recalFile /dev/null" + moreArgs, - 0, - new ArrayList(0)); - executeTest("testCountCovariatesWholeGenome", spec); - } - - private void testCountCovariatesWholeExomeRunner(String moreArgs) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + hg18Reference + - " -T CountCovariates" + - " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + - " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + - " -standard" + - " -OQ" + - " -knownSites " + GATKDataLocation + "dbsnp_132.hg18.vcf" + - " -recalFile /dev/null" + moreArgs, - 0, - new ArrayList(0)); - executeTest("testCountCovariatesWholeExome", spec); - } - - @Test - public void testCountCovariatesWholeGenome() { testCountCovariatesWholeGenomeRunner(""); } - @Test - public void testCountCovariatesWholeGenomeParallel() { testCountCovariatesWholeGenomeRunner(" -nt 4"); } - - @Test - public void testCountCovariatesWholeExome() { testCountCovariatesWholeExomeRunner(""); } - @Test - public void testCountCovariatesWholeExomeParallel() { testCountCovariatesWholeExomeRunner(" -nt 4"); } - - @Test - public void testTableRecalibratorWholeGenome() { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + hg18Reference + - " -T TableRecalibration" + - " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + - " -L chr1:1-50,000,000" + - " -OQ" + - " -recalFile " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.recal.csv" + - " -o /dev/null", - 0, - new ArrayList(0)); - executeTest("testTableRecalibratorWholeGenome", spec); - } - - @Test - public void testTableRecalibratorWholeExome() { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + hg18Reference + - " -T TableRecalibration" + - " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + - " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + - " -OQ" + - " -recalFile " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.recal.csv" + - " -o /dev/null", - 0, - new ArrayList(0)); - executeTest("testTableRecalibratorWholeExome", spec); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java index 7a849a819..80eda5ed9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java @@ -23,7 +23,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("27f9450afa132888a8994167f0035fd7")); + Arrays.asList("240d99b58f73985fb114abe9044c0271")); executeTest("Test probes", spec); } @@ -36,7 +36,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); + Arrays.asList("6e7789445e29d91979a21e78d3d53295")); executeTest("Test probes", spec); } @@ -49,7 +49,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); + Arrays.asList("18d7236208db603e143b40db06ef2aca")); executeTest("Test probes", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 43812d12d..94e52c2b9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -27,11 +27,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; +import org.testng.annotations.DataProvider; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; public class VariantEvalIntegrationTest extends WalkerTest { - private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/"; + private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/"; private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf"; private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf"; @@ -119,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0") + Arrays.asList("40abbc9be663aed8ee1158f832463ca8") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -141,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("087a2d9943c53e7f49663667c3305c7e") + Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -613,4 +616,37 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testWithAC0() { testIncludingAC0(true, "c786128cfe4d3e28cdbc15c5c838ad20"); } @Test public void testWithoutAC0() { testIncludingAC0(false, "7bc505c07d9aee49571ad4b3fc9f7feb"); } + // + // Test validation report is doing the right thing with sites only and genotypes files + // where the validation comp has more genotypes than eval + // + @Test(dataProvider = "testValidationReportData") + public void testValidationReport(final String name, final String eval, final String comp, final String md5) { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-eval " + eval, + "-comp " + comp, + "-L 20:10,000,000-10,000,010 -noST -noEV -EV ValidationReport -o %s" + ), + 1, + Arrays.asList(md5)); + executeTest("testValidationReport with " + name, spec); + } + + @DataProvider(name = "testValidationReportData") + public Object[][] testValidationReportData() { + final String compGenotypes = privateTestDir + "/validationReportComp.vcf"; + final String compSites = privateTestDir + "/validationReportComp.noGenotypes.vcf"; + final String evalGenotypes = privateTestDir + "/validationReportEval.vcf"; + final String evalSites = privateTestDir + "/validationReportEval.noGenotypes.vcf"; + + List tests = new ArrayList(); + tests.add(new Object[]{"sites/sites", evalSites, compSites, "0b32e19efce28087cdc7b58e17ed633a"}); + tests.add(new Object[]{"sites/genotypes", evalSites, compGenotypes, "e2ffecee4a3acd0da7dd7fe10a59b2bc"}); + tests.add(new Object[]{"genotypes/sites", evalGenotypes, compSites, "f0dbb848a94b451e42765b0cb9d09ee2"}); + tests.add(new Object[]{"genotypes/genotypes", evalGenotypes, compGenotypes, "73790b530595fcbd467a88475ea9717f"}); + return tests.toArray(new Object[][]{}); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerUnitTest.java index ca06ca699..f6c12f443 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerUnitTest.java @@ -49,13 +49,13 @@ import java.util.*; public class VariantEvalWalkerUnitTest extends BaseTest { - VariantEvalWalker VEwalker; + VariantEval VEwalker; VariantContext eval; @BeforeMethod public void init() { - VEwalker = new VariantEvalWalker(); + VEwalker = new VariantEval(); eval = new VariantContextBuilder("x", "chr1", 1, 1, Collections.singleton(Allele.create("A", true))).make(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 393905961..d1ecbb0bf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -13,17 +13,22 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { String recalMD5; String cutVCFMD5; public VRTest(String inVCF, String tranchesMD5, String recalMD5, String cutVCFMD5) { - this.inVCF = validationDataLocation + inVCF; + this.inVCF = inVCF; this.tranchesMD5 = tranchesMD5; this.recalMD5 = recalMD5; this.cutVCFMD5 = cutVCFMD5; } + + @Override + public String toString() { + return "VRTest{inVCF='" + inVCF +"'}"; + } } - VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", - "62f81e7d2082fbc71cae0101c27fefad", // tranches - "b9709e4180e56abc691b208bd3e8626c", // recal file - "75c178345f70ca2eb90205662fbdf968"); // cut VCF + VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf", + "f360ce3eb2b0b887301be917a9843e2b", // tranches + "287fea5ea066bf3fdd71f5ce9b58eab3", // recal file + "356b9570817b9389da71fbe991d8b2f5"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { @@ -69,14 +74,72 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { executeTest("testApplyRecalibration-"+params.inVCF, spec); } - VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf", + VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", + "a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches + "1cdf8c9ee77d91d1ba7f002573108bad", // recal file + "62fda105e14b619a1c263855cf56af1d"); // cut VCF + + @DataProvider(name = "VRBCFTest") + public Object[][] createVRBCFTest() { + return new Object[][]{ {bcfTest} }; + //return new Object[][]{ {yriTrio}, {lowPass} }; // Add hg19 chr20 trio calls here + } + + @Test(dataProvider = "VRBCFTest") + public void testVariantRecalibratorWithBCF(VRTest params) { + //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" + + " -resource:truth=true,training=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + + " -resource:training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" + + " -T VariantRecalibrator" + + " -input " + params.inVCF + + " -L 20:10,000,000-20,000,000" + + " --no_cmdline_in_header" + + " -an AC " + // integer value + " -an QD -an ReadPosRankSum -an FS -an InbreedingCoeff " + // floats value + " -mG 2 "+ + " -recalFile %s" + + " -tranchesFile %s", + 2, + Arrays.asList("bcf", "txt"), + Arrays.asList(params.recalMD5, params.tranchesMD5)); + executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst(); + } + + @Test(dataProvider = "VRBCFTest", dependsOnMethods="testVariantRecalibratorWithBCF") + public void testApplyRecalibrationWithBCF(VRTest params) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -T ApplyRecalibration" + + " -L 20:10,000,000-20,000,000" + + " --no_cmdline_in_header" + + " -input " + params.inVCF + + " -U LENIENT_VCF_PROCESSING -o %s" + + " -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) + + " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null), + Arrays.asList(params.cutVCFMD5)); + spec.disableShadowBCF(); + executeTest("testApplyRecalibration-"+params.inVCF, spec); + } + + + VRTest indelUnfiltered = new VRTest( + validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as . "b7589cd098dc153ec64c02dcff2838e4", // tranches "a04a9001f62eff43d363f4d63769f3ee", // recal file - "888eb042dd33b807bcbb8630896fda94"); // cut VCF + "64f576881e21323dd4078262604717a2"); // cut VCF + + VRTest indelFiltered = new VRTest( + validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS + "b7589cd098dc153ec64c02dcff2838e4", // tranches + "a04a9001f62eff43d363f4d63769f3ee", // recal file + "af22c55d91394c56a222fd40d6d54781"); // cut VCF @DataProvider(name = "VRIndelTest") - public Object[][] createData2() { - return new Object[][]{ {indel} }; + public Object[][] createTestVariantRecalibratorIndel() { + return new Object[][]{ {indelUnfiltered}, {indelFiltered} }; } @Test(dataProvider = "VRIndelTest") diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index bbee99ba6..3b60fa2c2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -98,16 +98,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); } @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format - @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); } + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); } @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 3277f5060..6a3d755d7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -125,4 +125,14 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { executeTest("test bad ref allele in deletion", spec); } + @Test + public void testComplexEvents() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("complexEvents.vcf", "ALL"), + 0, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") + ); + + executeTest("test validating complex events", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java index ec08d97c5..ddffb6e4c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java @@ -31,6 +31,8 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -53,11 +55,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); - h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAACTTACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 16, h1Cigar, bases, h1bases); } @Test @@ -68,11 +70,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); - h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA"; + basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; - basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases); } @Test @@ -102,11 +104,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + basicInsertTest("C", "CACTT", 6, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + basicInsertTest("G", "GACTT", 16, h1Cigar, bases, h1bases); } @Test @@ -120,12 +122,12 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(3, CigarOperator.D)); h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); - String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); - h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + String h1bases = "A" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("ATCG", "A", 0, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATAAAG" + "AGGGGGA" + "AGGC"; + basicInsertTest("CGATC", "AAA", 6, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC"; - basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + basicInsertTest("GGGGG", "G", 16, h1Cigar, bases, h1bases); } @Test @@ -148,13 +150,16 @@ public class HaplotypeUnitTest extends BaseTest { } private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) { - final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1); final Haplotype h = new Haplotype(hap.getBytes()); final Allele h1refAllele = Allele.create(ref, true); final Allele h1altAllele = Allele.create(alt, false); + final ArrayList alleles = new ArrayList(); + alleles.add(h1refAllele); + alleles.add(h1altAllele); + final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make(); h.setAlignmentStartHapwrtRef(0); h.setCigar(cigar); - final Haplotype h1 = h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE); + final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc); final Haplotype h1expected = new Haplotype(newHap.getBytes()); Assert.assertEquals(h1, h1expected); } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 282f19d8a..f7c564c74 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -123,12 +123,12 @@ public class ActivityProfileUnitTest extends BaseTest { for ( int i = 0; i < cfg.probs.size(); i++ ) { double p = cfg.probs.get(i); GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(loc, p); + profile.add(loc, new ActivityProfileResult(p)); } Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); Assert.assertEquals(profile.size(), cfg.probs.size()); - Assert.assertEquals(profile.isActiveList, cfg.probs); + assertProbsAreEqual(profile.isActiveList, cfg.probs); assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); } @@ -140,5 +140,12 @@ public class ActivityProfileUnitTest extends BaseTest { } } + private void assertProbsAreEqual(List actual, List expected) { + Assert.assertEquals(actual.size(), expected.size()); + for ( int i = 0; i < actual.size(); i++ ) { + Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i)); + } + } + // todo -- test extensions } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java index a0feef186..77050c069 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java @@ -351,7 +351,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { public void testEncodingListOfString(List strings, String expected) throws IOException { final String collapsed = BCF2Utils.collapseStringList(strings); Assert.assertEquals(collapsed, expected); - Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings); + Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings); } // ----------------------------------------------------------------- @@ -537,11 +537,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { return record; } - private final void decodeRecord(final List toEncode, final byte[] record) { + private final void decodeRecord(final List toEncode, final byte[] record) throws IOException { decodeRecord(toEncode, new BCF2Decoder(record)); } - private final void decodeRecord(final List toEncode, final BCF2Decoder decoder) { + private final void decodeRecord(final List toEncode, final BCF2Decoder decoder) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { Assert.assertFalse(decoder.blockIsFullyDecoded()); final Object decoded = decoder.decodeTypedValue(); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java deleted file mode 100644 index 8cd051e01..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -package org.broadinstitute.sting.utils.codecs.vcf; - -import com.google.java.contract.Requires; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.testng.Assert; -import org.testng.SkipException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - -public class VCFAlleleClipperUnitTest extends BaseTest { - // -------------------------------------------------------------------------------- - // - // Test allele clipping - // - // -------------------------------------------------------------------------------- - - private class ClipAllelesTest extends TestDataProvider { - final int position; - final int stop; - final String ref; - List inputs; - List expected; - - @Requires("arg.length % 2 == 0") - private ClipAllelesTest(final int position, final int stop, final String ... arg) { - super(ClipAllelesTest.class); - this.position = position; - this.stop = stop; - this.ref = arg[0]; - - int n = arg.length / 2; - inputs = new ArrayList(n); - expected = new ArrayList(n); - - for ( int i = 0; i < n; i++ ) { - final boolean ref = i % n == 0; - inputs.add(Allele.create(arg[i], ref)); - } - for ( int i = n; i < arg.length; i++ ) { - final boolean ref = i % n == 0; - expected.add(Allele.create(arg[i], ref)); - } - } - - public boolean isClipped() { - for ( int i = 0; i < inputs.size(); i++ ) { - if ( inputs.get(i).length() != expected.get(i).length() ) - return true; - } - - return false; - } - - public String toString() { - return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected); - } - } - @DataProvider(name = "ClipAllelesTest") - public Object[][] makeClipAllelesTest() { - // do no harm - new ClipAllelesTest(10, 10, "A", "A"); - new ClipAllelesTest(10, 10, "A", "C", "A", "C"); - new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G"); - - // insertions - new ClipAllelesTest(10, 10, "A", "AA", "-", "A"); - new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA"); - new ClipAllelesTest(10, 10, "A", "AG", "-", "G"); - - // deletions - new ClipAllelesTest(10, 11, "AA", "A", "A", "-"); - new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-"); - new ClipAllelesTest(10, 11, "AG", "A", "G", "-"); - new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-"); - - // multi-allelic insertion and deletions - new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA"); - new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG"); - new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA"); - new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA"); - new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG"); - new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G"); - - // cannot be clipped - new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG"); - new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG"); - - // symbolic - new ClipAllelesTest(10, 100, "A", "", "A", ""); - new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]"); - new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T"); - new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51["); - new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]"); - - // symbolic with alleles that should be clipped - new ClipAllelesTest(10, 100, "A", "", "AA", "-", "", "A"); - new ClipAllelesTest(10, 100, "AA", "", "A", "A", "", "-"); - new ClipAllelesTest(10, 100, "AA", "", "A", "AAA", "A", "", "-", "AA"); - new ClipAllelesTest(10, 100, "AG", "", "A", "AGA", "G", "", "-", "GA"); - new ClipAllelesTest(10, 100, "G", "", "A", "G", "", "A"); - - // clipping from both ends - // - // TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END - // -// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T"); -// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T"); -// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T"); -// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT"); -// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT"); -// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT"); - - // complex substitutions - new ClipAllelesTest(10, 10, "A", "GA", "A", "GA"); - - return ClipAllelesTest.getTests(ClipAllelesTest.class); - } - - @Test(dataProvider = "ClipAllelesTest") - public void testClipAllelesTest(ClipAllelesTest cfg) { - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop); - Assert.assertNull(clipped.getError(), "Unexpected error occurred"); - Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop"); - Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles"); - } - - @Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest") - public void testPaddingAllelesInVC(final ClipAllelesTest cfg) { - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop); - final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles()) - .referenceBaseForIndel(clipped.getRefBaseForIndel()).make(); - - if ( vc.isMixed() && vc.hasSymbolicAlleles() ) - throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized"); - - Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method"); - - if ( cfg.isClipped() ) { - // TODO - // TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is - // TODO actually the original stop, as the original stop is +1 its true size. - // TODO - final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1); - - final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); - Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start"); - Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles"); - Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)"); - Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again"); - } - } - - // -------------------------------------------------------------------------------- - // - // basic allele clipping test - // - // -------------------------------------------------------------------------------- - - private class ReverseClippingPositionTestProvider extends TestDataProvider { - final String ref; - final List alleles = new ArrayList(); - final int expectedClip; - - private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) { - super(ReverseClippingPositionTestProvider.class); - this.ref = ref; - for ( final String allele : alleles ) - this.alleles.add(Allele.create(allele)); - this.expectedClip = expectedClip; - } - - @Override - public String toString() { - return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); - } - } - - @DataProvider(name = "ReverseClippingPositionTestProvider") - public Object[][] makeReverseClippingPositionTestProvider() { - // pair clipping - new ReverseClippingPositionTestProvider(0, "ATT", "CCG"); - new ReverseClippingPositionTestProvider(1, "ATT", "CCT"); - new ReverseClippingPositionTestProvider(2, "ATT", "CTT"); - new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele - - // triplets - new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG"); - new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go - new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go - - return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); - } - - - @Test(dataProvider = "ReverseClippingPositionTestProvider") - public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { - int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); - Assert.assertEquals(result, cfg.expectedClip); - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index b271d8c84..71fc1d464 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -39,6 +39,28 @@ public class VCFIntegrationTest extends WalkerTest { executeTest("Test reading and writing breakpoint VCF", spec1); } + @Test(enabled = true) + public void testReadingLowerCaseBases() { + String testVCF = privateTestDir + "lowercaseBases.vcf"; + + String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; + + String test1 = baseCommand + "-T SelectVariants -V " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e0e308a25e56bde1c664139bb44ed19d")); + executeTest("Test reading VCF with lower-case bases", spec1); + } + + @Test(enabled = true) + public void testReadingAndWriting1000GSVs() { + String testVCF = privateTestDir + "1000G_SVs.chr1.vcf"; + + String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; + + String test1 = baseCommand + "-T SelectVariants -V " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab")); + executeTest("Test reading and writing 1000G Phase I SVs", spec1); + } + @Test public void testReadingAndWritingSamtools() { String testVCF = privateTestDir + "samtools.vcf"; @@ -46,7 +68,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284")); executeTest("Test reading and writing samtools vcf", spec1); } @@ -55,16 +77,16 @@ public class VCFIntegrationTest extends WalkerTest { String testVCF = privateTestDir + "ex2.vcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f")); executeTest("Test writing samtools WEx BCF example", spec1); } - @Test(enabled = false) + @Test(enabled = true) public void testReadingSamtoolsWExBCFExample() { String testVCF = privateTestDir + "ex2.bcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("63a2e0484ae37b0680514f53e0bf0c94")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0439e2b4ccc63bb4ba7c283cd9ab1b25")); executeTest("Test reading samtools WEx BCF example", spec1); } diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java deleted file mode 100644 index 74d9420b2..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java +++ /dev/null @@ -1,218 +0,0 @@ -package org.broadinstitute.sting.utils.recalibration; - -import org.broadinstitute.sting.gatk.walkers.bqsr.*; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -import java.util.*; - -/** - * Unit tests for on-the-fly recalibration. - * - * @author Mauricio Carneiro - * @since 3/16/12 - */ -public class BaseRecalibrationUnitTest { - - private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager; - - private ReadGroupCovariate rgCovariate; - private QualityScoreCovariate qsCovariate; - private ContextCovariate cxCovariate; - private CycleCovariate cyCovariate; - - private GATKSAMRecord read = ReadUtils.createRandomRead(10000); - private BaseRecalibration baseRecalibration; - private ReadCovariates readCovariates; - - - @BeforeClass - public void init() { - GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg"); - rg.setPlatform("illumina"); - read.setReadGroup(rg); - - byte[] quals = new byte[read.getReadLength()]; - for (int i = 0; i < read.getReadLength(); i++) - quals[i] = 20; - read.setBaseQualities(quals); - - RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - List requiredCovariates = new ArrayList(); - List optionalCovariates = new ArrayList(); - - dataManager = new org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager(true, 4); - - rgCovariate = new ReadGroupCovariate(); - rgCovariate.initialize(RAC); - requiredCovariates.add(rgCovariate); - - qsCovariate = new QualityScoreCovariate(); - qsCovariate.initialize(RAC); - requiredCovariates.add(qsCovariate); - - cxCovariate = new ContextCovariate(); - cxCovariate.initialize(RAC); - optionalCovariates.add(cxCovariate); - cyCovariate = new CycleCovariate(); - cyCovariate.initialize(RAC); - optionalCovariates.add(cyCovariate); - - final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; - int covariateIndex = 0; - for (final Covariate cov : requiredCovariates) - requestedCovariates[covariateIndex++] = cov; - for (final Covariate cov : optionalCovariates) - requestedCovariates[covariateIndex++] = cov; - - readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); - - RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates); - final NestedIntegerArray rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); - final NestedIntegerArray qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); - - for (int i=0; i covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j); - covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], EventType.BASE_SUBSTITUTION.index); - } - } - - dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE); - - List quantizedQuals = new ArrayList(); - List qualCounts = new ArrayList(); - for (byte i = 0; i <= QualityUtils.MAX_QUAL_SCORE; i++) { - quantizedQuals.add(i); - qualCounts.add(1L); - } - QuantizationInfo quantizationInfo = new QuantizationInfo(quantizedQuals, qualCounts); - quantizationInfo.noQuantization(); - baseRecalibration = new BaseRecalibration(quantizationInfo, recalibrationTables, requestedCovariates); - - } - - - @Test(enabled=false) - public void testGoldStandardComparison() { - for (int i = 0; i < read.getReadLength(); i++) { - int [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION); - Object [] objKey = buildObjectKey(bitKey); - byte v2 = baseRecalibration.performSequentialQualityCalculation(bitKey, EventType.BASE_SUBSTITUTION); - byte v1 = goldStandardSequentialCalculation(objKey); - Assert.assertEquals(v2, v1); - } - } - - private Object[] buildObjectKey(final int[] bitKey) { - Object[] key = new Object[bitKey.length]; - key[0] = rgCovariate.formatKey(bitKey[0]); - key[1] = qsCovariate.formatKey(bitKey[1]); - key[2] = cxCovariate.formatKey(bitKey[2]); - key[3] = cyCovariate.formatKey(bitKey[3]); - return key; - } - - /** - * Implements a serial recalibration of the reads using the combinational table. - * First, we perform a positional recalibration, and then a subsequent dinuc correction. - * - * Given the full recalibration table, we perform the following preprocessing steps: - * - * - calculate the global quality score shift across all data [DeltaQ] - * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift - * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual - * - The final shift equation is: - * - * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) - * - * @param key The list of Comparables that were calculated from the covariates - * @return A recalibrated quality score as a byte - */ - private byte goldStandardSequentialCalculation(final Object... key) { - - final byte qualFromRead = (byte) Integer.parseInt(key[1].toString()); - final Object[] readGroupCollapsedKey = new Object[1]; - final Object[] qualityScoreCollapsedKey = new Object[2]; - final Object[] covariateCollapsedKey = new Object[3]; - - // The global quality shift (over the read group only) - readGroupCollapsedKey[0] = key[0]; - final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum globalRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(0).get(readGroupCollapsedKey)); - double globalDeltaQ = 0.0; - if (globalRecalDatum != null) { - final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality(); - final double aggregrateQReported = globalRecalDatum.getEstimatedQReported(); - globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported; - } - - // The shift in quality between reported and empirical - qualityScoreCollapsedKey[0] = key[0]; - qualityScoreCollapsedKey[1] = key[1]; - final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum qReportedRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(1).get(qualityScoreCollapsedKey)); - double deltaQReported = 0.0; - if (qReportedRecalDatum != null) { - final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality(); - deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; - } - - // The shift in quality due to each covariate by itself in turn - double deltaQCovariates = 0.0; - double deltaQCovariateEmpirical; - covariateCollapsedKey[0] = key[0]; - covariateCollapsedKey[1] = key[1]; - for (int iii = 2; iii < key.length; iii++) { - covariateCollapsedKey[2] = key[iii]; // The given covariate - final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum covariateRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(iii).get(covariateCollapsedKey)); - if (covariateRecalDatum != null) { - deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality(); - deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); - } - } - - final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; - return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_RECALIBRATED_Q_SCORE); - - // Verbose printouts used to validate with old recalibrator - //if(key.contains(null)) { - // System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d", - // qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte)); - //} - //else { - // System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d", - // key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) ); - //} - - //return newQualityByte; - } - - public static double calcEmpiricalQual(final int observations, final int errors) { - final int smoothing = 1; - final double doubleMismatches = (double) (errors + smoothing); - final double doubleObservations = (double) ( observations + smoothing ); - double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - return Math.min(QualityUtils.MAX_RECALIBRATED_Q_SCORE, empiricalQual); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java similarity index 89% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java index 553b7e237..2556448ad 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java @@ -1,5 +1,8 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.ContextCovariate; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java similarity index 90% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java index 3fa1e916d..c3d93b2cb 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java new file mode 100644 index 000000000..0ff2eaf03 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.recalibration; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class QualQuantizerUnitTest extends BaseTest { + @BeforeSuite + public void before() { + + } + + // -------------------------------------------------------------------------------- + // + // merge case Provider + // + // -------------------------------------------------------------------------------- + + private class QualIntervalTestProvider extends TestDataProvider { + final QualQuantizer.QualInterval left, right; + int exError, exTotal, exQual; + double exErrorRate; + + private QualIntervalTestProvider(int leftE, int leftN, int rightE, int rightN, int exError, int exTotal) { + super(QualIntervalTestProvider.class); + + QualQuantizer qq = new QualQuantizer(0); + left = qq.new QualInterval(10, 10, leftN, leftE, 0); + right = qq.new QualInterval(11, 11, rightN, rightE, 0); + + this.exError = exError; + this.exTotal = exTotal; + this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1)); + this.exQual = QualityUtils.probToQual(1-this.exErrorRate, 0); + } + } + + @DataProvider(name = "QualIntervalTestProvider") + public Object[][] makeQualIntervalTestProvider() { + new QualIntervalTestProvider(10, 100, 10, 1000, 20, 1100); + new QualIntervalTestProvider(0, 100, 10, 900, 10, 1000); + new QualIntervalTestProvider(10, 900, 0, 100, 10, 1000); + new QualIntervalTestProvider(0, 0, 10, 100, 10, 100); + new QualIntervalTestProvider(1, 10, 9, 90, 10, 100); + new QualIntervalTestProvider(1, 10, 9, 100000, 10, 100010); + new QualIntervalTestProvider(1, 10, 9, 1000000, 10,1000010); + + return QualIntervalTestProvider.getTests(QualIntervalTestProvider.class); + } + + @Test(dataProvider = "QualIntervalTestProvider") + public void testQualInterval(QualIntervalTestProvider cfg) { + QualQuantizer.QualInterval merged = cfg.left.merge(cfg.right); + Assert.assertEquals(merged.nErrors, cfg.exError); + Assert.assertEquals(merged.nObservations, cfg.exTotal); + Assert.assertEquals(merged.getErrorRate(), cfg.exErrorRate); + Assert.assertEquals(merged.getQual(), cfg.exQual); + } + + @Test + public void testMinInterestingQual() { + for ( int q = 0; q < 15; q++ ) { + for ( int minQual = 0; minQual <= 10; minQual ++ ) { + QualQuantizer qq = new QualQuantizer(minQual); + QualQuantizer.QualInterval left = qq.new QualInterval(q, q, 100, 10, 0); + QualQuantizer.QualInterval right = qq.new QualInterval(q+1, q+1, 1000, 100, 0); + + QualQuantizer.QualInterval merged = left.merge(right); + boolean shouldBeFree = q+1 <= minQual; + if ( shouldBeFree ) + Assert.assertEquals(merged.getPenalty(), 0.0); + else + Assert.assertTrue(merged.getPenalty() > 0.0); + } + } + } + + + // -------------------------------------------------------------------------------- + // + // High-level case Provider + // + // -------------------------------------------------------------------------------- + + private class QuantizerTestProvider extends TestDataProvider { + final List nObservationsPerQual = new ArrayList(); + final int nLevels; + final List expectedMap; + + private QuantizerTestProvider(final List nObservationsPerQual, final int nLevels, final List expectedMap) { + super(QuantizerTestProvider.class); + + for ( int x : nObservationsPerQual ) + this.nObservationsPerQual.add((long)x); + this.nLevels = nLevels; + this.expectedMap = expectedMap; + } + + @Override + public String toString() { + return String.format("QQTest nLevels=%d nObs=[%s] map=[%s]", + nLevels, Utils.join(",", nObservationsPerQual), Utils.join(",", expectedMap)); + } + } + + @DataProvider(name = "QuantizerTestProvider") + public Object[][] makeQuantizerTestProvider() { + List allQ2 = Arrays.asList(0, 0, 1000, 0, 0); + + new QuantizerTestProvider(allQ2, 5, Arrays.asList(0, 1, 2, 3, 4)); + new QuantizerTestProvider(allQ2, 1, Arrays.asList(2, 2, 2, 2, 2)); + + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 0, 1000), 2, Arrays.asList(2, 2, 2, 2, 4)); + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 1, 1000), 2, Arrays.asList(2, 2, 2, 4, 4)); + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 10, 1000), 2, Arrays.asList(2, 2, 2, 2, 4)); + + return QuantizerTestProvider.getTests(QuantizerTestProvider.class); + } + + @Test(dataProvider = "QuantizerTestProvider", enabled = true) + public void testQuantizer(QuantizerTestProvider cfg) { + QualQuantizer qq = new QualQuantizer(cfg.nObservationsPerQual, cfg.nLevels, 0); + logger.warn("cfg: " + cfg); + for ( int i = 0; i < cfg.expectedMap.size(); i++) { + int expected = cfg.expectedMap.get(i); + int observed = qq.originalToQuantizedMap.get(i); + //logger.warn(String.format(" qq map: %s : %d => %d", i, expected, observed)); + Assert.assertEquals(observed, expected); + } + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java similarity index 92% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java index 37994cf12..dac26cb53 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -41,7 +43,7 @@ public class ReadCovariatesUnitTest { requestedCovariates[2] = coCov; requestedCovariates[3] = cyCov; - ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates); + ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); // check that the length is correct Assert.assertEquals(rc.getMismatchesKeySet().length, length); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java similarity index 88% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java index a83508353..78a74d259 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.ReadGroupCovariate; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java new file mode 100644 index 000000000..33985e0ac --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.recalibration; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class RecalDatumUnitTest extends BaseTest { + + // -------------------------------------------------------------------------------- + // + // merge case Provider + // + // -------------------------------------------------------------------------------- + + private class RecalDatumTestProvider extends TestDataProvider { + int exError, exTotal, reportedQual; + + private RecalDatumTestProvider(int E, int N, int reportedQual) { + super(RecalDatumTestProvider.class); + + this.exError = E; + this.exTotal = N; + this.reportedQual = reportedQual; + } + + public double getErrorRate() { + return (exError + 1) / (1.0 * (exTotal + 2)); + } + + public double getErrorRatePhredScaled() { + return QualityUtils.phredScaleErrorRate(getErrorRate()); + } + + public int getReportedQual() { + return reportedQual; + } + + public RecalDatum makeRecalDatum() { + return new RecalDatum(exTotal, exError, (byte)getReportedQual()); + } + + @Override + public String toString() { + return String.format("exError=%d, exTotal=%d, reportedQual=%d", exError, exTotal, reportedQual); + } + } + + @DataProvider(name = "RecalDatumTestProvider") + public Object[][] makeRecalDatumTestProvider() { + for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) ) + for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) ) + for ( int reportedQual : Arrays.asList(10, 20) ) + if ( E <= N ) + new RecalDatumTestProvider(E, N, reportedQual); + return RecalDatumTestProvider.getTests(RecalDatumTestProvider.class); + } + + @Test(dataProvider = "RecalDatumTestProvider") + public void testRecalDatumBasics(RecalDatumTestProvider cfg) { + final RecalDatum datum = cfg.makeRecalDatum(); + assertBasicFeaturesOfRecalDatum(datum, cfg); + } + + private static void assertBasicFeaturesOfRecalDatum(final RecalDatum datum, final RecalDatumTestProvider cfg) { + Assert.assertEquals(datum.getNumMismatches(), cfg.exError); + Assert.assertEquals(datum.getNumObservations(), cfg.exTotal); + if ( cfg.getReportedQual() != -1 ) + Assert.assertEquals(datum.getEstimatedQReportedAsByte(), cfg.getReportedQual()); + BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalQuality(), cfg.getErrorRatePhredScaled()); + BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalErrorRate(), cfg.getErrorRate()); + } + + @Test(dataProvider = "RecalDatumTestProvider") + public void testRecalDatumCopyAndCombine(RecalDatumTestProvider cfg) { + final RecalDatum datum = cfg.makeRecalDatum(); + final RecalDatum copy = new RecalDatum(datum); + assertBasicFeaturesOfRecalDatum(copy, cfg); + + RecalDatumTestProvider combinedCfg = new RecalDatumTestProvider(cfg.exError * 2, cfg.exTotal * 2, cfg.reportedQual); + copy.combine(datum); + assertBasicFeaturesOfRecalDatum(copy, combinedCfg); + } + + @Test(dataProvider = "RecalDatumTestProvider") + public void testRecalDatumModification(RecalDatumTestProvider cfg) { + RecalDatum datum = cfg.makeRecalDatum(); + datum.setEmpiricalQuality(10.1); + Assert.assertEquals(datum.getEmpiricalQuality(), 10.1); + + datum.setEstimatedQReported(10.1); + Assert.assertEquals(datum.getEstimatedQReported(), 10.1); + Assert.assertEquals(datum.getEstimatedQReportedAsByte(), 10); + + datum = cfg.makeRecalDatum(); + cfg.exTotal = 100000; + datum.setNumObservations(cfg.exTotal); + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); + cfg.exError = 1000; + datum.setNumMismatches(cfg.exError); + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); + datum.increment(true); + cfg.exError++; + cfg.exTotal++; + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); + datum.increment(10, 5); + cfg.exError += 5; + cfg.exTotal += 10; + assertBasicFeaturesOfRecalDatum(datum, cfg); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java similarity index 95% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java index e4a77c016..387cc94d6 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java @@ -1,9 +1,10 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -72,7 +73,7 @@ public class RecalibrationReportUnitTest { final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE); int nKeys = 0; // keep track of how many keys were produced - final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates); + final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates); final NestedIntegerArray rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java index ed9805d19..65398c373 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java @@ -37,8 +37,6 @@ import org.testng.annotations.Test; // public Allele(byte[] bases, boolean isRef) { // public Allele(boolean isRef) { // public Allele(String bases, boolean isRef) { -// public boolean isNullAllele() { return length() == 0; } -// public boolean isNonNullAllele() { return ! isNullAllele(); } // public boolean isReference() { return isRef; } // public boolean isNonReference() { return ! isReference(); } // public byte[] getBases() { return bases; } @@ -49,13 +47,10 @@ import org.testng.annotations.Test; * Basic unit test for RecalData */ public class AlleleUnitTest { - Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall; + Allele ARef, A, T, ATIns, ATCIns, NoCall; @BeforeSuite public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); - A = Allele.create("A"); ARef = Allele.create("A", true); T = Allele.create("T"); @@ -72,8 +67,6 @@ public class AlleleUnitTest { Assert.assertFalse(A.isReference()); Assert.assertTrue(A.basesMatch("A")); Assert.assertEquals(A.length(), 1); - Assert.assertTrue(A.isNonNull()); - Assert.assertFalse(A.isNull()); Assert.assertTrue(ARef.isReference()); Assert.assertFalse(ARef.isNonReference()); @@ -92,8 +85,8 @@ public class AlleleUnitTest { Assert.assertFalse(NoCall.isReference()); Assert.assertFalse(NoCall.basesMatch(".")); Assert.assertEquals(NoCall.length(), 0); - Assert.assertTrue(NoCall.isNonNull()); - Assert.assertFalse(NoCall.isNull()); + Assert.assertTrue(NoCall.isNoCall()); + Assert.assertFalse(NoCall.isCalled()); } @@ -103,16 +96,6 @@ public class AlleleUnitTest { Assert.assertEquals(ATCIns.length(), 3); Assert.assertEquals(ATIns.getBases(), "AT".getBytes()); Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes()); - - Assert.assertTrue(del.isNonReference()); - Assert.assertFalse(delRef.isNonReference()); - Assert.assertFalse(del.isReference()); - Assert.assertTrue(delRef.isReference()); - Assert.assertFalse(del.basesMatch("-")); - Assert.assertTrue(del.basesMatch("")); - Assert.assertEquals(del.length(), 0); - Assert.assertFalse(del.isNonNull()); - Assert.assertTrue(del.isNull()); } @@ -128,18 +111,6 @@ public class AlleleUnitTest { Assert.assertFalse(a1.equals(a4)); } - @Test - public void testDelConstructors() { - Allele a1 = Allele.create("-"); - Allele a2 = Allele.create("-".getBytes()); - Allele a3 = Allele.create(""); - Allele a4 = Allele.create("", true); - - Assert.assertTrue(a1.equals(a2)); - Assert.assertTrue(a1.equals(a3)); - Assert.assertFalse(a1.equals(a4)); - } - @Test public void testInsConstructors() { Allele a1 = Allele.create("AC"); @@ -156,7 +127,6 @@ public class AlleleUnitTest { public void testEquals() { Assert.assertTrue(ARef.basesMatch(A)); Assert.assertFalse(ARef.equals(A)); - Assert.assertFalse(ARef.equals(del)); Assert.assertFalse(ARef.equals(ATIns)); Assert.assertFalse(ARef.equals(ATCIns)); @@ -164,11 +134,6 @@ public class AlleleUnitTest { Assert.assertFalse(T.basesMatch(A)); Assert.assertFalse(T.equals(A)); - Assert.assertTrue(del.basesMatch(del)); - Assert.assertTrue(del.basesMatch(delRef)); - Assert.assertTrue(del.equals(del)); - Assert.assertFalse(del.equals(delRef)); - Assert.assertTrue(ATIns.equals(ATIns)); Assert.assertFalse(ATIns.equals(ATCIns)); Assert.assertTrue(ATIns.basesMatch("AT")); @@ -209,7 +174,6 @@ public class AlleleUnitTest { public void testExtend() { Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString()); Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString()); - Assert.assertEquals("A", Allele.extend(Allele.create("-"), "A".getBytes()).toString()); Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString()); Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString()); Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString()); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java index fa5476150..50fbea708 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java @@ -50,12 +50,12 @@ public class VCFJarClassLoadingUnitTest { ClassLoader classLoader = new URLClassLoader(jarURLs, null); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext"); -// TODO -- uncomment when we include BCF2 codec -// classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec"); + classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec"); classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec"); classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec"); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter"); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter"); + classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.BCF2Writer"); } /** diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java index 7c522eadf..0e5522e3a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java @@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark { public void run(final VariantContext vc) { if ( samples == null ) samples = new HashSet(new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake)); - VariantContext sub = vc.subContextFromSamples(samples, true); + VariantContext sub = vc.subContextFromSamples(samples); sub.getNSamples(); } }; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index ca4cdf306..b95e589b7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -225,10 +225,10 @@ public class VariantContextTestProvider { add(builder()); add(builder().alleles("A")); add(builder().alleles("A", "C", "T")); - add(builder().alleles("-", "C").referenceBaseForIndel("A")); - add(builder().alleles("-", "CAGT").referenceBaseForIndel("A")); - add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A")); - add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A")); + add(builder().alleles("A", "AC")); + add(builder().alleles("A", "ACAGT")); + add(builder().loc("1", 10, 11).alleles("AC", "A")); + add(builder().loc("1", 10, 13).alleles("ACGT", "A")); // make sure filters work add(builder().unfiltered()); @@ -302,8 +302,8 @@ public class VariantContextTestProvider { sites.add(builder().alleles("A").make()); sites.add(builder().alleles("A", "C", "T").make()); - sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make()); - sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make()); + sites.add(builder().alleles("A", "AC").make()); + sites.add(builder().alleles("A", "ACAGT").make()); for ( VariantContext site : sites ) { addGenotypes(site); @@ -597,23 +597,41 @@ public class VariantContextTestProvider { } public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException { + testReaderWriter(tester, data.header, data.vcs, data.vcs, true); + } + + public static void testReaderWriter(final VariantContextIOTest tester, + final VCFHeader header, + final List expected, + final Iterable vcs, + final boolean recurse) throws IOException { final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension()); tmpFile.deleteOnExit(); - // todo -- test all options - - // write + // write expected to disk final EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = tester.makeWriter(tmpFile, options); - writer.writeHeader(data.header); - final List expected = data.vcs; - for ( VariantContext vc : expected ) - writer.add(vc); - writer.close(); + writeVCsToFile(writer, header, vcs); - final Iterable actual = readAllVCs(tmpFile, tester.makeCodec()).getSecond(); + // ensure writing of expected == actual + final Pair> p = readAllVCs(tmpFile, tester.makeCodec()); + final Iterable actual = p.getSecond(); assertEquals(actual, expected); + if ( recurse ) { + // if we are doing a recursive test, grab a fresh iterator over the written values + final Iterable read = readAllVCs(tmpFile, tester.makeCodec()).getSecond(); + testReaderWriter(tester, p.getFirst(), expected, read, false); + } + } + + private static void writeVCsToFile(final VariantContextWriter writer, final VCFHeader header, final Iterable vcs) { + // write + writer.writeHeader(header); + for ( VariantContext vc : vcs ) + if (vc != null) + writer.add(vc); + writer.close(); } /** @@ -722,6 +740,8 @@ public class VariantContextTestProvider { Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles"); assertAttributesEquals(actual.getAttributes(), expected.getAttributes()); + Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied"); + Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered"); BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters"); BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual()); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 1d290118f..272166c68 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest { int snpLocStart = 10; int snpLocStop = 10; - // - / ATC [ref] from 20-23 + // - / ATC [ref] from 20-22 String delLoc = "chr1"; int delLocStart = 20; - int delLocStop = 23; + int delLocStop = 22; // - [ref] / ATC from 20-20 String insLoc = "chr1"; int insLocStart = 20; int insLocStop = 20; - // - / A / T / ATC [ref] from 20-23 - String mixedLoc = "chr1"; - int mixedLocStart = 20; - int mixedLocStop = 23; - VariantContextBuilder basicBuilder, snpBuilder, insBuilder; @BeforeSuite public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); + del = Allele.create("A"); + delRef = Allele.create("A", true); A = Allele.create("A"); C = Allele.create("C"); @@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest { @BeforeMethod public void beforeTest() { - basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); - snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); - insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A'); + basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)); + snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)); + insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)); } @Test @@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testCreatingDeletionVariantContext() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); + VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); Assert.assertEquals(vc.getChr(), delLoc); Assert.assertEquals(vc.getStart(), delLocStart); @@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testMatchingAlleles() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); - VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make(); + VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); + VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make(); Assert.assertTrue(vc.hasSameAllelesAs(vc2)); Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2)); @@ -386,13 +381,13 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testAccessingCompleteGenotypes() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T, ATC); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); - Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del)); - Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del)); + Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC)); + Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC)); Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) @@ -408,7 +403,7 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(10, vc.getCalledChrCount()); Assert.assertEquals(3, vc.getCalledChrCount(Aref)); Assert.assertEquals(4, vc.getCalledChrCount(T)); - Assert.assertEquals(3, vc.getCalledChrCount(del)); + Assert.assertEquals(3, vc.getCalledChrCount(ATC)); Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL)); } @@ -416,7 +411,7 @@ public class VariantContextUnitTest extends BaseTest { public void testAccessingRefGenotypes() { List alleles1 = Arrays.asList(Aref, T); List alleles2 = Arrays.asList(Aref); - List alleles3 = Arrays.asList(Aref, T, del); + List alleles3 = Arrays.asList(Aref, T); for ( List alleles : Arrays.asList(alleles1, alleles2, alleles3)) { Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref)); @@ -438,7 +433,7 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testFilters() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); @@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testRepeatAllele() { - Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true); - Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false); - Allele atc = Allele.create("ATC", false); - Allele atcatc = Allele.create("ATCATC", false); - Allele ccccR = Allele.create("CCCC", true); - Allele cc = Allele.create("CC", false); - Allele cccccc = Allele.create("CCCCCC", false); - Allele gagaR = Allele.create("GAGA", true); - Allele gagagaga = Allele.create("GAGAGAGA", false); + Allele nullR = Allele.create("A", true); + Allele nullA = Allele.create("A", false); + Allele atc = Allele.create("AATC", false); + Allele atcatc = Allele.create("AATCATC", false); + Allele ccccR = Allele.create("ACCCC", true); + Allele cc = Allele.create("ACC", false); + Allele cccccc = Allele.create("ACCCCCC", false); + Allele gagaR = Allele.create("AGAGA", true); + Allele gagagaga = Allele.create("AGAGAGAGA", false); Pair,byte[]> result; byte[] refBytes = "TATCATCATCGGA".getBytes(); @@ -497,15 +492,15 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7); - // -*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 + // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[1],4); Assert.assertEquals(result.getSecond().length,3); - // ATC*,-,ATCATC - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ATCref,nullA,atcatc)).make(); + // ATC*,A,ATCATC + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[1],2); @@ -522,7 +517,7 @@ public class VariantContextUnitTest extends BaseTest { // CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9 refBytes = "TCCCCCCCAGAGAGAG".getBytes(); - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],7); Assert.assertEquals(result.getFirst().toArray()[1],5); @@ -532,7 +527,7 @@ public class VariantContextUnitTest extends BaseTest { // GAGA*,-,GAGAGAGA refBytes = "TGAGAGAGAGATTT".getBytes(); - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(gagaR, nullA,gagagaga)).make(); + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],5); Assert.assertEquals(result.getFirst().toArray()[1],3); @@ -564,27 +559,24 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testVCFfromGenotypes() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); - Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del)); - VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make(); + VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4).make(); VariantContext vc12 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true); VariantContext vc1 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName())), true); VariantContext vc23 = vc.subContextFromSamples(new HashSet(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true); VariantContext vc4 = vc.subContextFromSamples(new HashSet(Arrays.asList(g4.getSampleName())), true); VariantContext vc14 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true); - VariantContext vc5 = vc.subContextFromSamples(new HashSet(Arrays.asList(g5.getSampleName())), true); Assert.assertTrue(vc12.isPolymorphicInSamples()); Assert.assertTrue(vc23.isPolymorphicInSamples()); Assert.assertTrue(vc1.isMonomorphicInSamples()); Assert.assertTrue(vc4.isMonomorphicInSamples()); Assert.assertTrue(vc14.isMonomorphicInSamples()); - Assert.assertTrue(vc5.isPolymorphicInSamples()); Assert.assertTrue(vc12.isSNP()); Assert.assertTrue(vc12.isVariant()); @@ -606,17 +598,11 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertFalse(vc14.isVariant()); Assert.assertFalse(vc14.isBiallelic()); - Assert.assertTrue(vc5.isIndel()); - Assert.assertTrue(vc5.isSimpleDeletion()); - Assert.assertTrue(vc5.isVariant()); - Assert.assertTrue(vc5.isBiallelic()); - Assert.assertEquals(3, vc12.getCalledChrCount(Aref)); Assert.assertEquals(1, vc23.getCalledChrCount(Aref)); Assert.assertEquals(2, vc1.getCalledChrCount(Aref)); Assert.assertEquals(0, vc4.getCalledChrCount(Aref)); Assert.assertEquals(2, vc14.getCalledChrCount(Aref)); - Assert.assertEquals(0, vc5.getCalledChrCount(Aref)); } public void testGetGenotypeMethods() { @@ -664,13 +650,12 @@ public class VariantContextUnitTest extends BaseTest { @DataProvider(name = "getAlleles") public Object[][] mergeAllelesData() { new GetAllelesTest("A*", Aref); - new GetAllelesTest("-*", delRef); new GetAllelesTest("A*/C", Aref, C); new GetAllelesTest("A*/C/T", Aref, C, T); new GetAllelesTest("A*/T/C", Aref, T, C); - new GetAllelesTest("A*/C/T/-", Aref, C, T, del); - new GetAllelesTest("A*/T/C/-", Aref, T, C, del); - new GetAllelesTest("A*/-/T/C", Aref, del, T, C); + new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC); + new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC); + new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C); return GetAllelesTest.getTests(GetAllelesTest.class); } @@ -678,7 +663,7 @@ public class VariantContextUnitTest extends BaseTest { @Test(dataProvider = "getAlleles") public void testMergeAlleles(GetAllelesTest cfg) { final List altAlleles = cfg.alleles.subList(1, cfg.alleles.size()); - final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make(); + final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make(); Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles"); Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size"); @@ -845,7 +830,6 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError()); Assert.assertEquals(sub.getFilters(), vc.getFilters()); Assert.assertEquals(sub.getID(), vc.getID()); - Assert.assertEquals(sub.getReferenceBaseForIndel(), vc.getReferenceBaseForIndel()); Assert.assertEquals(sub.getAttributes(), vc.getAttributes()); Set expectedGenotypes = new HashSet(); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index b09a10d07..95e8458c8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -39,7 +39,7 @@ import java.io.FileNotFoundException; import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, C, delRef, Cref, ATC, ATCATC; + Allele Aref, T, C, Cref, ATC, ATCATC; private GenomeLocParser genomeLocParser; @BeforeSuite @@ -56,7 +56,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { // alleles Aref = Allele.create("A", true); Cref = Allele.create("C", true); - delRef = Allele.create("-", true); T = Allele.create("T"); C = Allele.create("C"); ATC = Allele.create("ATC"); @@ -99,7 +98,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { private VariantContext makeVC(String source, List alleles, Collection genotypes, Set filters) { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; - return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make(); + return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make(); } // -------------------------------------------------------------------------------- @@ -156,28 +155,23 @@ public class VariantContextUtilsUnitTest extends BaseTest { Arrays.asList(Aref, C), Arrays.asList(Aref, T, C)); // in order of appearence - // The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant. - // The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference. - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(Cref)); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, ATC), + Arrays.asList(Aref, ATC)); - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATC)); - - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, ATC, ATCATC), + Arrays.asList(Aref, ATC, ATCATC)); // alleles in the order we see them - new MergeAllelesTest(Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATCATC, ATC)); + new MergeAllelesTest(Arrays.asList(Aref, ATCATC), + Arrays.asList(Aref, ATC, ATCATC), + Arrays.asList(Aref, ATCATC, ATC)); // same - new MergeAllelesTest(Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + new MergeAllelesTest(Arrays.asList(Aref, ATC), + Arrays.asList(Aref, ATCATC), + Arrays.asList(Aref, ATC, ATCATC)); return MergeAllelesTest.getTests(MergeAllelesTest.class); } @@ -661,4 +655,52 @@ public class VariantContextUtilsUnitTest extends BaseTest { // test alleles are equal Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); } + + // -------------------------------------------------------------------------------- + // + // basic allele clipping test + // + // -------------------------------------------------------------------------------- + + private class ReverseClippingPositionTestProvider extends TestDataProvider { + final String ref; + final List alleles = new ArrayList(); + final int expectedClip; + + private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) { + super(ReverseClippingPositionTestProvider.class); + this.ref = ref; + for ( final String allele : alleles ) + this.alleles.add(Allele.create(allele)); + this.expectedClip = expectedClip; + } + + @Override + public String toString() { + return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); + } + } + + @DataProvider(name = "ReverseClippingPositionTestProvider") + public Object[][] makeReverseClippingPositionTestProvider() { + // pair clipping + new ReverseClippingPositionTestProvider(0, "ATT", "CCG"); + new ReverseClippingPositionTestProvider(1, "ATT", "CCT"); + new ReverseClippingPositionTestProvider(2, "ATT", "CTT"); + new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele + + // triplets + new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG"); + new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go + new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go + + return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); + } + + + @Test(dataProvider = "ReverseClippingPositionTestProvider") + public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { + int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); + Assert.assertEquals(result, cfg.expectedClip); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java index 6f5756bdc..8f03f1d38 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java @@ -56,7 +56,7 @@ public class VariantJEXLContextUnitTest extends BaseTest { Allele A, Aref, T, Tref; - Allele del, delRef, ATC, ATCref; + Allele ATC, ATCref; // A [ref] / T at 10 GenomeLoc snpLoc; @@ -84,9 +84,6 @@ public class VariantJEXLContextUnitTest extends BaseTest { @BeforeMethod public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); - A = Allele.create("A"); Aref = Allele.create("A", true); T = Allele.create("T"); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java index a7fff4559..5876efa12 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java @@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest { Map attributes = new HashMap(); GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size()); - alleles.add(Allele.create("-",true)); - alleles.add(Allele.create("CC",false)); + alleles.add(Allele.create("A",true)); + alleles.add(Allele.create("ACC",false)); attributes.put("DP","50"); for (String name : header.getGenotypeSamples()) { @@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest { genotypes.add(gt); } return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles) - .genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make(); + .genotypes(genotypes).attributes(attributes).make(); } diff --git a/public/packages/AnalyzeCovariates.xml b/public/packages/AnalyzeCovariates.xml deleted file mode 100644 index 27a72eabd..000000000 --- a/public/packages/AnalyzeCovariates.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - diff --git a/public/packages/GATK-Picard.xml b/public/packages/GATK-Picard.xml deleted file mode 100644 index 29057d398..000000000 --- a/public/packages/GATK-Picard.xml +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml index 78f3f0cea..2de0273f3 100644 --- a/public/packages/GATKEngine.xml +++ b/public/packages/GATKEngine.xml @@ -48,11 +48,11 @@ - - - - - - + + + + + + diff --git a/public/packages/GenomeAnalysisTK.xml b/public/packages/GenomeAnalysisTK.xml index d57d12f64..e95c992b6 100644 --- a/public/packages/GenomeAnalysisTK.xml +++ b/public/packages/GenomeAnalysisTK.xml @@ -33,12 +33,8 @@ - - - - - - + + diff --git a/public/packages/GenomeAnalysisTKLite.xml b/public/packages/GenomeAnalysisTKLite.xml index aa03670ae..b2f73434c 100644 --- a/public/packages/GenomeAnalysisTKLite.xml +++ b/public/packages/GenomeAnalysisTKLite.xml @@ -33,11 +33,7 @@ - - - - - + diff --git a/public/packages/Queue.xml b/public/packages/Queue.xml index c74ef9912..621a549d5 100644 --- a/public/packages/Queue.xml +++ b/public/packages/Queue.xml @@ -32,12 +32,10 @@ - - - - + + diff --git a/public/packages/QueueLite.xml b/public/packages/QueueLite.xml index d91a86c9c..0ead68fb0 100644 --- a/public/packages/QueueLite.xml +++ b/public/packages/QueueLite.xml @@ -32,11 +32,9 @@ - - - + diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 677d973f5..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -258,8 +258,8 @@ class DataProcessingPipeline extends QScript { // Accessory files val targetIntervals = if (cleaningModel == ConsensusDeterminationModel.KNOWNS_ONLY) {globalIntervals} else {swapExt(bam, ".bam", ".intervals")} val metricsFile = swapExt(bam, ".bam", ".metrics") - val preRecalFile = swapExt(bam, ".bam", ".pre_recal.csv") - val postRecalFile = swapExt(bam, ".bam", ".post_recal.csv") + val preRecalFile = swapExt(bam, ".bam", ".pre_recal.table") + val postRecalFile = swapExt(bam, ".bam", ".post_recal.table") val preOutPath = swapExt(bam, ".bam", ".pre") val postOutPath = swapExt(bam, ".bam", ".post") val preValidateLog = swapExt(bam, ".bam", ".pre.validation") @@ -281,9 +281,7 @@ class DataProcessingPipeline extends QScript { dedup(cleanedBam, dedupedBam, metricsFile), cov(dedupedBam, preRecalFile), recal(dedupedBam, preRecalFile, recalBam), - cov(recalBam, postRecalFile), - analyzeCovariates(preRecalFile, preOutPath), - analyzeCovariates(postRecalFile, postOutPath)) + cov(recalBam, postRecalFile)) cohortList :+= recalBam @@ -345,11 +343,12 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".clean" } - case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { + case class cov (inBam: File, outRecalFile: File) extends BaseRecalibrator with CommandLineGATKArgs { this.knownSites ++= qscript.dbSNP - this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") + this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate") this.input_file :+= inBam - this.recal_file = outRecalFile + this.disable_indel_quals = true + this.out = outRecalFile if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString) else if (qscript.intervals != null) this.intervals :+= qscript.intervals @@ -358,14 +357,13 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outRecalFile + ".covariates" } - case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { + case class recal (inBam: File, inRecalFile: File, outBam: File) extends PrintReads with CommandLineGATKArgs { this.input_file :+= inBam - this.recal_file = inRecalFile + this.BQSR = inRecalFile this.baq = CalculationMode.CALCULATE_AS_NECESSARY this.out = outBam if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString) else if (qscript.intervals != null) this.intervals :+= qscript.intervals - this.no_pg_tag = qscript.testMode this.scatterCount = nContigs this.isIntermediate = false this.analysisName = queueLogDir + outBam + ".recalibration" @@ -379,13 +377,6 @@ class DataProcessingPipeline extends QScript { ****************************************************************************/ - case class analyzeCovariates (inRecalFile: File, outPath: File) extends AnalyzeCovariates { - this.recal_file = inRecalFile - this.output_dir = outPath.toString - this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" - this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" - } - case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs { this.input :+= inBam this.output = outBam diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala index 46587c5b6..a4a6636fe 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala @@ -80,8 +80,8 @@ class PacbioProcessingPipeline extends QScript { // BAM Steps val mqBAM: File = swapExt(bamBase, ".bam", ".mq.bam") - val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv") - val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv") + val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.table") + val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.table") val recalBam: File = swapExt(bamBase, ".bam", ".recal.bam") val path1: String = recalBam + ".before" val path2: String = recalBam + ".after" @@ -102,9 +102,7 @@ class PacbioProcessingPipeline extends QScript { add(cov(bam, recalFile1, resetQuals), recal(bam, recalFile1, recalBam), - cov(recalBam, recalFile2, false), - analyzeCovariates(recalFile1, path1), - analyzeCovariates(recalFile2, path2)) + cov(recalBam, recalFile2, false)) } } @@ -162,36 +160,29 @@ class PacbioProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".rg" } - case class cov (inBam: File, outRecalFile: File, resetQuals: Boolean) extends CountCovariates with CommandLineGATKArgs { + case class cov (inBam: File, outRecalFile: File, resetQuals: Boolean) extends BaseRecalibrator with CommandLineGATKArgs { if (resetQuals) this.DBQ = dbq this.knownSites :+= dbSNP - this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") + this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate") this.input_file :+= inBam - this.recal_file = outRecalFile + this.disable_indel_quals = true + this.out = outRecalFile this.analysisName = queueLogDir + outRecalFile + ".covariates" this.jobName = queueLogDir + outRecalFile + ".covariates" this.scatterCount = threads this.read_filter :+= "BadCigar" } - case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { + case class recal (inBam: File, inRecalFile: File, outBam: File) extends PrintReads with CommandLineGATKArgs { this.DBQ = dbq this.input_file :+= inBam - this.recal_file = inRecalFile + this.BQSR = inRecalFile this.out = outBam - this.no_pg_tag = testMode this.isIntermediate = false this.analysisName = queueLogDir + outBam + ".recalibration" this.jobName = queueLogDir + outBam + ".recalibration" this.read_filter :+= "BadCigar" this.scatterCount = threads } - - case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { - this.recal_file = inRecalFile - this.output_dir = outPath - this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" - this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" - } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index 7a22e700b..041e84a8c 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -205,8 +205,7 @@ class QCommandLine extends CommandLineProgram with Logging { private def createQueueHeader() : Seq[String] = { Seq(String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp), "Copyright (c) 2012 The Broad Institute", - "Please view our documentation at http://www.broadinstitute.org/gsa/wiki", - "For support, please view our support site at http://getsatisfaction.com/gsa") + "Fro support and documentation go to http://www.broadinstitute.org/gatk") } private def getQueueVersion : String = { diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala index 20458c7c4..3fb9e0efa 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -41,7 +41,7 @@ class DataProcessingPipelineTest { " -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf", " -test ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "0de95b5642e41e11ecd6fa1770242b88" + spec.fileMD5s += testOut -> "60d39ae909fdd049920b54e0965b6d3c" PipelineTest.executeTest(spec) } @@ -60,7 +60,7 @@ class DataProcessingPipelineTest { " -bwa /home/unix/carneiro/bin/bwa", " -bwape ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "72beeb037bfc5a07599630a23d8b325b" + spec.fileMD5s += testOut -> "61ca3237afdfabf78ee27a5bb80dae59" PipelineTest.executeTest(spec) } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala index 0989f8d24..74e947377 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest { " -blasr ", " -test ", " -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString - spec.fileMD5s += testOut -> "cf147e7f56806598371f8d5d6794b852" + spec.fileMD5s += testOut -> "61b06e8b78a93e6644657e6d38851084" PipelineTest.executeTest(spec) } } diff --git a/public/testdata/testfile.sam b/public/testdata/testfile.sam new file mode 100644 index 000000000..7345c03d1 --- /dev/null +++ b/public/testdata/testfile.sam @@ -0,0 +1,450 @@ +@HD VN:1.0 GO:none SO:coordinate +@SQ SN:chrM LN:16571 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d2ed829b8a1628d16cbeee88e88e39eb SP:Homo sapiens +@SQ SN:chr1 LN:247249719 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:9ebc6df9496613f373e73396d5b3b6b6 SP:Homo sapiens +@SQ SN:chr2 LN:242951149 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:b12c7373e3882120332983be99aeb18d SP:Homo sapiens +@SQ SN:chr3 LN:199501827 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:0e48ed7f305877f66e6fd4addbae2b9a SP:Homo sapiens +@SQ SN:chr4 LN:191273063 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:cf37020337904229dca8401907b626c2 SP:Homo sapiens +@SQ SN:chr5 LN:180857866 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:031c851664e31b2c17337fd6f9004858 SP:Homo sapiens +@SQ SN:chr6 LN:170899992 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:bfe8005c536131276d448ead33f1b583 SP:Homo sapiens +@SQ SN:chr7 LN:158821424 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:74239c5ceee3b28f0038123d958114cb SP:Homo sapiens +@SQ SN:chr8 LN:146274826 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:1eb00fe1ce26ce6701d2cd75c35b5ccb SP:Homo sapiens +@SQ SN:chr9 LN:140273252 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:ea244473e525dde0393d353ef94f974b SP:Homo sapiens +@SQ SN:chr10 LN:135374737 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:4ca41bf2d7d33578d2cd7ee9411e1533 SP:Homo sapiens +@SQ SN:chr11 LN:134452384 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:425ba5eb6c95b60bafbf2874493a56c3 SP:Homo sapiens +@SQ SN:chr12 LN:132349534 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d17d70060c56b4578fa570117bf19716 SP:Homo sapiens +@SQ SN:chr13 LN:114142980 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:c4f3084a20380a373bbbdb9ae30da587 SP:Homo sapiens +@SQ SN:chr14 LN:106368585 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:c1ff5d44683831e9c7c1db23f93fbb45 SP:Homo sapiens +@SQ SN:chr15 LN:100338915 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:5cd9622c459fe0a276b27f6ac06116d8 SP:Homo sapiens +@SQ SN:chr16 LN:88827254 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:3e81884229e8dc6b7f258169ec8da246 SP:Homo sapiens +@SQ SN:chr17 LN:78774742 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:2a5c95ed99c5298bb107f313c7044588 SP:Homo sapiens +@SQ SN:chr18 LN:76117153 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:3d11df432bcdc1407835d5ef2ce62634 SP:Homo sapiens +@SQ SN:chr19 LN:63811651 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:2f1a59077cfad51df907ac25723bff28 SP:Homo sapiens +@SQ SN:chr20 LN:62435964 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:f126cdf8a6e0c7f379d618ff66beb2da SP:Homo sapiens +@SQ SN:chr21 LN:46944323 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:f1b74b7f9f4cdbaeb6832ee86cb426c6 SP:Homo sapiens +@SQ SN:chr22 LN:49691432 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:2041e6a0c914b48dd537922cca63acb8 SP:Homo sapiens +@SQ SN:chrX LN:154913754 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d7e626c80ad172a4d7c95aadb94d9040 SP:Homo sapiens +@SQ SN:chrY LN:57772954 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:62f69d0e82a12af74bad85e2e4a8bd91 SP:Homo sapiens +@SQ SN:chr1_random LN:1663265 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:cc05cb1554258add2eb62e88c0746394 SP:Homo sapiens +@SQ SN:chr2_random LN:185571 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:18ceab9e4667a25c8a1f67869a4356ea SP:Homo sapiens +@SQ SN:chr3_random LN:749256 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:9cc571e918ac18afa0b2053262cadab6 SP:Homo sapiens +@SQ SN:chr4_random LN:842648 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:9cab2949ccf26ee0f69a875412c93740 SP:Homo sapiens +@SQ SN:chr5_random LN:143687 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:05926bdbff978d4a0906862eb3f773d0 SP:Homo sapiens +@SQ SN:chr6_random LN:1875562 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d62eb2919ba7b9c1d382c011c5218094 SP:Homo sapiens +@SQ SN:chr7_random LN:549659 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:28ebfb89c858edbc4d71ff3f83d52231 SP:Homo sapiens +@SQ SN:chr8_random LN:943810 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:0ed5b088d843d6f6e6b181465b9e82ed SP:Homo sapiens +@SQ SN:chr9_random LN:1146434 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:1e3d2d2f141f0550fa28a8d0ed3fd1cf SP:Homo sapiens +@SQ SN:chr10_random LN:113275 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:50be2d2c6720dabeff497ffb53189daa SP:Homo sapiens +@SQ SN:chr11_random LN:215294 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:bfc93adc30c621d5c83eee3f0d841624 SP:Homo sapiens +@SQ SN:chr13_random LN:186858 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:563531689f3dbd691331fd6c5730a88b SP:Homo sapiens +@SQ SN:chr15_random LN:784346 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:bf885e99940d2d439d83eba791804a48 SP:Homo sapiens +@SQ SN:chr16_random LN:105485 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:dd06ea813a80b59d9c626b31faf6ae7f SP:Homo sapiens +@SQ SN:chr17_random LN:2617613 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:34d5e2005dffdfaaced1d34f60ed8fc2 SP:Homo sapiens +@SQ SN:chr18_random LN:4262 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:f3814841f1939d3ca19072d9e89f3fd7 SP:Homo sapiens +@SQ SN:chr19_random LN:301858 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:420ce95da035386cc8c63094288c49e2 SP:Homo sapiens +@SQ SN:chr21_random LN:1679693 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:a7252115bfe5bb5525f34d039eecd096 SP:Homo sapiens +@SQ SN:chr22_random LN:257318 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:4f2d259b82f7647d3b668063cf18378b SP:Homo sapiens +@SQ SN:chrX_random LN:1719168 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:f4d71e0758986c15e5455bf3e14e5d6f SP:Homo sapiens +@RG ID:20FUK.1 PL:illumina PU:20FUKAAXX100202.1 LB:Solexa-18483 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20FUK.2 PL:illumina PU:20FUKAAXX100202.2 LB:Solexa-18484 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20FUK.3 PL:illumina PU:20FUKAAXX100202.3 LB:Solexa-18483 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20FUK.4 PL:illumina PU:20FUKAAXX100202.4 LB:Solexa-18484 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20FUK.5 PL:illumina PU:20FUKAAXX100202.5 LB:Solexa-18483 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20FUK.6 PL:illumina PU:20FUKAAXX100202.6 LB:Solexa-18484 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20FUK.7 PL:illumina PU:20FUKAAXX100202.7 LB:Solexa-18483 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20FUK.8 PL:illumina PU:20FUKAAXX100202.8 LB:Solexa-18484 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20GAV.1 PL:illumina PU:20GAVAAXX100126.1 LB:Solexa-18483 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20GAV.2 PL:illumina PU:20GAVAAXX100126.2 LB:Solexa-18484 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20GAV.3 PL:illumina PU:20GAVAAXX100126.3 LB:Solexa-18483 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20GAV.4 PL:illumina PU:20GAVAAXX100126.4 LB:Solexa-18484 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20GAV.5 PL:illumina PU:20GAVAAXX100126.5 LB:Solexa-18483 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20GAV.6 PL:illumina PU:20GAVAAXX100126.6 LB:Solexa-18484 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20GAV.7 PL:illumina PU:20GAVAAXX100126.7 LB:Solexa-18483 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI +@RG ID:20GAV.8 PL:illumina PU:20GAVAAXX100126.8 LB:Solexa-18484 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI +@PG ID:BWA VN:0.5.7 CL:tk +20GAVAAXX100126:8:21:18798:198557 83 chr1 9999918 60 101M = 9999594 -424 CAAGCTCCGCCTCCCAGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCCAATTTTTTTGTATT 7A9AAA79;;.@=BAA;60@C=BCBBAC@@@5;A>A??@B6?BA;BB9BB@BBABBAABBBBBBBBBBBCBBBBBBBCBBBCBCBBBCCBBCCB@CCBB UQ:i:0 +20GAVAAXX100126:8:26:17788:24663 99 chr1 9999965 60 101M = 10000321 441 GAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATAGGGTTTCACCATATTAGCCAGGATGGTCTTGATC CDBACCCDBCCDADCACCCACABBBABC;BCA;BBCBCBBCDCDDDDDA@BBCDDDABCACCDCDCCCCCAD@BCABCCCCDCCBCCCCDCCCCCDDCDDD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHGHHHHIHHHDHHHHHHHHHHHHHHHHHHHGGGGHHHHGGHGHHHHHHHHHHFHGHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:47:10796:84088 99 chr1 9999998 60 101M = 10000328 430 CCTGGCCAATTTTTTTGTATTTTTAGTAGAGATAGGGTTTCACCATATTAGCCAGGATGGTCTTGATCTGCTGACCTCATGACCCACCCGCCTCGGCCTTC CBC@CCCCDCDDDDDAA@BCDDDAACBCCDCDCC@CCBBDBBABCCCCDCCBBCCCCC>C;BDDBDCBDBBDBDABDBCCBDACCC@AA9CCDC;DCCDED MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHGGEGHHHHGGHHHHHHHHHGHHHIHHIHHHHHHHHHHHHHHGHDHCHHHHHHHHHHHHHHHHHHHHHHHHHIGGGHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:62:7317:185100 675 chr1 10000001 60 78M23S = 10000382 415 GGCCAATTTTTTTGTATTTTTAGTAGAGATAGAGTTTCACCATATTAGCCAGGATGGCCTTAATCTGCTGACCTCATAATCACACAGCCTCGTCCTTCAAA >?78+*?88CBC24B.BBCA>6=846;8C-2@(?@10AB++7C>A/C9<>A,@?.B,()C6*B3:D8@C*+92A:BA######################## MD:Z:32G24T3G15G0 PG:Z:BWA RG:Z:20GAV.8 AM:i:25 NM:i:4 SM:i:25 MQ:i:60 OQ:Z:==76**=76BCA44A.ACC?;4:43273B,1=*=A34BB--8B7BCDB;CCCDDCCBE>D@DD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHGGGGHHHHGGHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHEEHHHHHHHHHHHHGHBHGHH UQ:i:0 +20GAVAAXX100126:8:64:9003:28381 83 chr1 10000026 60 101M = 9999704 -422 GAGATAGGGTTTCACCATATTAGCCAGGATGGTCTTGATCTGCTGACCTCATGACCCACCCGCCTCGGCCTTCCAAAGTGCTGGGATTACAGGTGTGAGCC DCCBDD?A2ABB@BCBCCCDC@BC@CA@AB@ABCDCBCCCCBACAACCDBCCB@BCB@CC;BCCD;BBCCDDCBDDDACBCCBBBCDCCBDBACACBBBCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HFHFHHDG9GGGGGHHHHHHHDHHGIGGFFFHGHHHHHGHHHGHGFHHHHHHHEGHHFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:66:8049:3241 83 chr1 10000031 60 101M = 9999623 -508 AGGGTTTCACCATATTAGCCAGGATGGTCTTGATCTGCTGACCTCATGACCCACCCGCCTCGGCCTTCCAAAGTGCTGGGATTACAGGTGTGAGCCACCGC ?DCBEEDCCCCCCCD=DBCBDCBCCBADCDCBCCCCBCCBBCCDB@CB9BCBBCC;BCCD;BBCCDDCBDDDACBCCBBBCDCBBDBACACBDCCCCA:BC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BHHHHHHHIHHHHHHBHHHHHHHHHHHHHHHHHGHHHHHHGHHHHFHHBGHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:65:18189:17218 163 chr1 10000038 60 101M = 10000302 364 CACCATATTAGCCAGGATGGTCTTGATCTGCTGACCTCATGACCCACCCGCCTCGGCCTTCCAAAGTGCTGGGATTACAGGTGTGAGCCACCGCGACCGGC @CABAAABCCBBCBCCCCCCBBDCBCCBDCBDBA@BB?B@B@AC?CACC;@CD@:BBCBB>CB>=A;ACC@:CA@@@>A<@B9@;B?C4BB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCCBCCCBCCCCBCCBBCB@BBBBBBBBBBBB>BBBABAB@BBABBBBABBAB@?AB@@@B@>9?=@BAA8A@?>@@=;?6?4?=?@6@??<@?@==8=A UQ:i:0 +20GAVAAXX100126:8:27:11272:197846 611 chr1 10000110 60 79M22S = 10000329 297 GATTACAGGTGTGAGCCACCGCGACCGGCCTGCTCAAGATAATTTTTAGGGCTAACTATGACATGAACCCCAAAATTCCTGTCCTCTAGATGGCAGAAACC 1;>5;;5?9>>;:;02<=<<5>5@;<4@9<<=:><<>=A6;*<;:=B8?)78:9::;9;####################### MD:Z:79 PG:Z:BWA RG:Z:20GAV.8 AM:i:25 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:5555551455<==@014544EEEEA>>F@>>E>AE>AAF:4+454AAF=A/=AA50=9==A7A=AF;F-;44544=5?####################### UQ:i:0 +20GAVAAXX100126:8:7:13574:93999 163 chr1 10000133 60 101M = 10000392 359 ACCGGCCTGCTCAAGATAATTTTTAGGGCTAACTATGACATGAACCCCAAAATTCCTGTCCTCTAGATGGCAGAAACCAAGATAAAGTATCCCCACATGGC @AB:AABCBBDBCBCCCCDCCCCCCCCCCDCDADB=>A@BAACD=BCCCDBBC@CCCCABBCB@CC@@AA@CBBAB?BC=ABC@CDB??A?B>AC?CCABAB?BAA?@?AA@A?@AA@<@@@@A@?;@=?A9AA@??:>B UQ:i:0 +20GAVAAXX100126:8:65:18208:188053 163 chr1 10000137 60 101M = 10000358 321 GCCTGCTCAAGATAATTTTTAGGGCTAACTATGACATGAACCCCAAAATTCCTGTCCTCTAGATGGCAGAAACCAAGATAAAGTATCCCCACATGGCCACA @CCC?@BBBDBBCAEDBBCCCCDDCDDEBECCBC?@AAB@?CCDADDDCC@AD@BCADADBCCBCC8CCCBDBABCABABBBDABDCB@C@?C@CACD?BC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCCCCCCCCCCCCCCCCCCBCCCCBCCCCCBBCCCBBBCCBBCBBBBBBBCBBBCCBCBBBBBBACBBBABBBBBBB?BB@B>BBBBABBBBBA@ABBBB UQ:i:0 +20GAVAAXX100126:8:65:17424:133143 163 chr1 10000184 60 101M = 10000444 360 ATTCCTGTCCTCTAGATGGCAGAAACCAAGATAAAGTATCCCCACATGGCCACAAGGTTAAGCTCTTATGGACACAAAACAAGGCAGAGAAATGTCATTTG @CBA@BAABCDBDACCBBCBC=CDDACCDCCCCDCB;BBABCCC?CCBCC@BAADCBBCBBBBBBD@CBB@DBA?BBBB?@BBABCCC=BAAA@A@CBAAB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBCBBABBBBBBBB@BBBB9BBBBBBBBBBBBBB?BBBBBBBBBAABBAABBAAA:AAAABABAAB@A@ABA@A@A>@?A?@A@@@>>A>>@9@@=?;A UQ:i:0 +20GAVAAXX100126:8:24:16681:115751 83 chr1 10000185 60 101M = 9999853 -432 TTCCTGTCCTCTAGATGGCAGAAACCAAGATAAAGTATCCCCACATGGCCACAAGGTTAAGCTCTTATGGACACAAAACAAGGCAGAGAAATGTCATTTGG 6EA6DADD?DCCD@CCCB?DBDDCCBDDBCCDDDACCDCCCBBBCCBB@BBBDDBADCDCBCDCDCCCBBBBC=>DDBBDDBBBDBDBDD@CADCCDBCBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:23 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:5HG;HHHHEHHHHGHHHHDHHHHHHHHHHHHHHHHHHHHHHHGHHHHHFHHHHHHHHHHHHHHHHHHHHHHHHEEHHHHHHHHHHHHHHHFHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:65:2449:158320 147 chr1 10000187 60 24S77M = 9999832 -431 AACTATGACATGACCCCCAAAATTCCTGTCCTCTAGATGGCAGAAACCAAGATAAAGTATCCCCACATGGCCACAAGGTTAAGCTCTTATGGACACAAAAC #########################@<=?>?A@67C@?A@>?4BACB=D?>A==@;?BBCBACBCCDCADCCDBCACDACCCBBBBAA@BA@ MD:Z:77 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:#########################?:?::?><68B>?@??;1??@@@A>@@;@@@@@?>A@@A?AB@AA@BABBBBBBBBBBB?BBBBBBBABABBCBBB UQ:i:0 +20GAVAAXX100126:8:66:6889:6402 163 chr1 10000196 60 74M27S = 10000514 418 TAGATGGCAGAAACCAAGATAAAGTATCCCCACATGGCCACAAGGTTAAGCTCTTATGGACACAAAACAAGGCAGAGAAATGTCATTTGGCATTGGTTTCA @C<B>?A@BDD?<=?DD>-A=C?=?;97A7B<=7>532;C><9B7D=8<2:B############################ MD:Z:74 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BC;9B:=>:B>=>;=B;B@>7;;-08B9@7A6B:562)8B##################################################################################### MD:Z:13C21 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:1 SM:i:37 MQ:i:29 OQ:Z::B5BCB=6,A4:1)4@##################################################################################### UQ:i:8 +20GAVAAXX100126:8:65:18189:17218 83 chr1 10000302 60 101M = 10000038 -364 CCATAGCAACATTTGTAAATGACCAGCCTGATGGGCTGGCTTGAAAACTTGGCTTATAGGCATCCTAAACCCACGTTCTATCCCCTGATACTCCCCTCTTC DDCDDCCDCCCDDCACDDCBBCCBDBCCCBCCBBBCCCBBDCBDDDBCDCBBCDCCCCBBBCDCCCDD@CCBA;ADDCCCDCCCCCBCCBCDCCCCDADDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHIHHHHHHHFHHHHHHHHHHHHHHHHHHHIHHHHHHGHHHHHHHHHHHHHHHHHHHHHFHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:4:6560:50417 163 chr1 10000312 60 76M25S = 10000637 425 ATTTGTAAATGACCAGCCTGATGGGCTGGCTTGAAAACTTGGCTTATAGGCATCCTAAACCCACCTTCTATCCCCTGATACTCCGCTTTTCATTACAGCAC ?>9?=7;BA;:B;/5BCD?CCC>)A8=:$;30B474@>8<8?*B)BE7?@+?<.-/+39978BCAA########################## MD:Z:64G11 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:?;9=:18A?7:???B:B<4-4;,6ACB8?>'=60A627@:6<8A*@*>B58>+?;+-,)69259@CB@########################## UQ:i:14 +20GAVAAXX100126:8:26:17788:24663 147 chr1 10000321 60 15S86M = 9999965 -441 AGCAACATTTGTAAATGACCAGCCTGATGGGCTGGCTTGAAAACTTGGCTTATAGGCATCCTAATCCCACGTTCTATCCCCTGATACTCCCCTCTTCATTA ################B=?@@@B@>>4:CB5;C??BDB@@3>BBB8&4BCB;?DCCCB@CCDCBACA@?@=>2:?>77@>B@BB??=@?BA@B?AA>>>AAB=;2?BBB<(9BBAB@BBBBA=BBCCAABA7@BBBBBBBBBBCBCB UQ:i:5 +20GAVAAXX100126:8:3:21049:178760 163 chr1 10000325 60 95M6S = 10000650 425 CAGCCTGATGGGCTGGCTTGAAAACTTGGCTTATAGGCATCCTAAACCCACGTTCTATCCCCTGATACTCCCCTCTTCATTACAGAACAACAAAGAAAGAC @CBAABACCBCCBCBCBDC?CDDEADCCCBDCAAB=?==@@CBCBD8CCC?;CAAD=C@BBC?>BA=?C>?=CA@CB?B4>?####### UQ:i:0 +20GAVAAXX100126:8:47:10796:84088 147 chr1 10000328 60 101M = 9999998 -430 CCTGATGGGCTGGCTTGAAAACTTGGCTTATAGGCATCCTAAACCCACGTTCTATCCCCTGATACTCCCCTCTTCATTACAGAACAACAAAGAAAGACAAA ;?@B::A5;?8B:9@?=@DB@B@97@>4':8>B>?>?@>:BC>CCAC6AC>C@CCCABBCBB?>A3>CCCBCBCBD@B?BEBCACCAACCDBCBCA@ABC@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z::7>@55<37=6=;6:9A;78?>3'4::>>A;;@<;AA8AB>>?AB@A@AABB@BBBBA@B3@BBB@B?ABC=A:ACBCBCBBBBBBBBBBBBBBBB UQ:i:0 +20GAVAAXX100126:8:27:11272:197846 659 chr1 10000329 60 22S79M = 10000110 -297 GCAACCTTTGTAACAGACCTGTCTGATGGGCTGGCTTGAAAACTTGGCTTATAGGAATCCGAAACCTACGTACTATCCCCTGATACTCCCCTCTTCATTAC #######################@1>:@C3ACBB1*?CA7(-)87)'>%.(-65;B&@<765@=B3@2C5;C7,*?76@A; MD:Z:33C4T5C4T29 PG:Z:BWA RG:Z:20GAV.8 AM:i:25 NM:i:4 SM:i:25 MQ:i:60 OQ:Z:#######################?0=8??0>?839350@@);A?0)@1*?BBC@-,ACA@)/,6=)):*-)(128@)>;333?;@1=7B6;B3+*@33AB7 UQ:i:34 +20GAVAAXX100126:8:5:4250:100581 147 chr1 10000330 60 9S92M = 10000006 -415 TGACCAGCCTGATGGGCTGGCTTGAAAACTTGGCTTATAGGCATCCTAAACCCACGTTCTATCCCCTGATACTCCCCTCTTCATTACAGAACAACAAAGAA ##########?A>@==??B79=@819@;>8@BBA=@;5?A>@=::BA;B?:>>B?A;5@>?@=BBBBCCCCCCBCDCBCDCBBCBCABCAAAACBC@ MD:Z:92 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:##########?>>>7:>==77<<817?9:1@?A@??86<@@><4:??=?>?A@7@@@?@A>6A@AA?AAA@B@BBBB@BBABBBB@BBBBBBBBBBBBBBB UQ:i:0 +20GAVAAXX100126:8:65:18208:188053 83 chr1 10000358 60 101M = 10000137 -321 TAGGCATCCTAAACCCACGTTCTATCCCCTGATACTCCCCTCTTCATTACAGAACAACAAAGAAAGACAAATTCTTAGCATAAAGTACACCAGATTTGCTA DECCCCDDCCA@>BBAC9ADDCCCDCCCCCBCCCCDCCCCDCDDBCDCBBCBDBBDB@@DDBDDCBBBDDCDDCDCDBBCCDDDACBBCCBDBCDDC@BCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHGGAGGFHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:62:7317:185100 595 chr1 10000382 60 66S35M = 10000001 -415 TTAAATGTCCAGCATGATGGGCTGGCTTGAAAACTTGGCTTATAGGCATCCTAAACCCACGTTCTATCCCCTGATACTCCCCTCTTCATTCCAGAACAACA ##################################################################################################### MD:Z:24A10 PG:Z:BWA RG:Z:20GAV.8 AM:i:25 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:##################################################################################################### UQ:i:2 +20GAVAAXX100126:8:7:13574:93999 83 chr1 10000392 60 101M = 10000133 -359 CTCCCCTCTTCATTACAGAACAACAAAGAAAGACAAATTCTTAGCATAAAGTACACCAGATTTGCTACAGCCTAAGACTGGTCTGACAAATCCTTTTTTTC DEDDDCDCDDCCDCCBDBDCBDC@ADDBDDDBCBDDCDCCDCDBBCCDDDACBBBCBCBCDDCBCCBBCBCCCDDBBCCBADCCBBBDDCDCAADDDBDDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHGHHHHHHHHHHHHHIHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHH UQ:i:0 +20GAVAAXX100126:8:25:6601:158705 99 chr1 10000400 37 101M = 10000802 436 TTCATTACAGAACAACAAAGAAAGACAAATTCTTAGCATAAAGTACACCAGATTTGCTACAGCCTAAGACTGGTCTGACAAATCCTTTTTTTCTACTAATC 8;>7=;A?@5>>=>>;3>6=5=4328<::9;;=;:<6198=*7;9>A>??;<:>>?>B6==:41>A:4;, MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:37 MQ:i:29 OQ:Z:@CCEE@CCC;44444A7A;<7<863@<===55444=:1===,:@@FFFFD@==CAE88CE>9@0 UQ:i:0 +20GAVAAXX100126:8:4:15177:54134 99 chr1 10000435 60 101M = 10000750 415 GCATAAAGTACACCAGATTTGCTACAGCCTAAGACTGGTCTGACAAATCCTTTTTTTCTACTAATCAGACCCTCGCAGAGAAGACAAATAGTGGCATTTAC CBCACDDC@CACAB-CDCDDBBCCACCBBDCDCDACBCABDBDACDDCBBCDDDDDA>CAADBDCBBCDABBDB;BCCDCDDCDACDDCCCBCDCDCDBDB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHIHHHHH-HHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHGEGGHHIHHHFHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHIFHH UQ:i:0 +20GAVAAXX100126:8:65:17424:133143 83 chr1 10000444 60 101M = 10000184 -360 ACACCAGATTTGCTACAGCCTAAGACTGGTCTGACAAATCCTTTTTTTCTACTAATCAGACCCTCGCAGAGAAGACAAATAGTGGCATTTACCGTTTACAC DDCDCDCCDDCBCCCBDBCCCDDBBCCBADCCBCBDDCDCBAAADDDDCCBCCDCDBCBBCCCD;BBCBCBDDBBBDDCCDACBBBCDDCCC;ADDCABCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGGHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:61:14621:35929 163 chr1 10000470 60 101M = 10000818 448 TGGTCTGACAAATCCTTTTTTTCTACTAATCAGACCCTCGCAGAGAAGACAAATAGTGGCATTTACCGTTTACACAACATATACAGAGAGAGAGAGACCAG =BC@ABAAAC=DCADDBBCCCCBDCBDCEDCC@C?B>C<:@CAC?BDB@@@BDACCACCCAC6?AA>8BCACA?>AB?CBA9C?BDDB>C@>C@DACB>@7 MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:CCBBB=A?ACCB?DDBC>5@@?>:?BBC@DBC@C@?;;7=>9>=>;@?AACCBACDDBDCDAD@?D:=?=><===?? UQ:i:0 +20GAVAAXX100126:8:21:13663:85034 163 chr1 10000518 60 86M15S = 10000794 376 ACAAATAGTGGCATTTACCGTTTACACAACATATACAGAGAGAGAGAGACCAGAAACTTGGCTGGTAAGAATTTCTTCCTCTGGCCAGGAGCGGTGGCTCA @ABCBABBBBCBCABCCAC;ACCCACBCDACCCCB>ABBBAABC@CCCBA@CC@DDADCA?BB?CB=CABACCA@CB@A=@@A@@@A@=;@@>???################ UQ:i:0 +20GAVAAXX100126:8:68:13621:42696 595 chr1 10000556 29 66S35M = 10000250 -340 TTCTACAAATCAGACCCTCGCAGAGAAGACAAATAGTGGCATTTACCGTTTACCCAACATATACAGAGAGAGAGAGACCAGAAACTTGGCTGGTAAGCATT ##################################################################################################### MD:Z:31A3 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:1 SM:i:0 MQ:i:37 OQ:Z:##################################################################################################### UQ:i:2 +20GAVAAXX100126:8:4:6560:50417 83 chr1 10000637 60 101M = 10000312 -425 TTGGGGAGGTTGAGGCGTGTGGATCAGAAGGTCAAGAGATCCAGACCATCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCTG A37:<;++7+::2%848-8:909::>9;;;:9<:<;6,:A>;<<==;6>=<@BB:B@@=>;=AC=D;;7778A@=D>CBA@>DD>C39@>=@>;;=:8:6: MD:Z:2T6C7G83 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:3 SM:i:37 MQ:i:60 OQ:Z:E84<@<-05-443(444/4553<<=<@@=@>544446-=DA?@?A?=:A?=EHH=HFFDDBEHHEH55445FHDDHCHHGCCHHEH11444FDBBDBFEBE UQ:i:44 +20GAVAAXX100126:8:3:21049:178760 83 chr1 10000650 60 101M = 10000325 -425 GGCGGGTGGATCAGAAGGTCAAGAGATCCAGACCATCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCTGGACATGGTGGTGG DD;CCADCCCDCDBDDCADBDDBDBCDCBDBBCBCDCCCBBCBDCBCCBACBDDBCCC;ADCDCCBCA@DDDCAABDDDDCDCDBCCBBBBCCCACC?CBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHGGHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:4:15177:54134 147 chr1 10000750 60 101M = 10000435 -415 GGCGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCTAGATCACGCCACTGCACTCCA CA8@1@B?BC>D?BCDBBCAB>CBBACABDABEC?CDBBDACCCCCCAC@@ACDCDACDC@BAABB@DCBCDACBDBCCCDBCCBA;ACA,ACBA@ABCB@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:A<>;2>=?A@?@@AAAAAA?B=BABBABABABBBBBBBBBBBB@BBBBBCA;BBBBBBBBBBBBBBBBBBBBBBBBBABBBBBBBBBBB@)@BBB@BBBBB UQ:i:0 +20GAVAAXX100126:8:21:13663:85034 83 chr1 10000794 60 101M = 10000518 -376 TGCTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCTAGATCACGCCACTGCACTCCAGCCTGGCGACACAGCGAGACTCCGTCTCAAAAAAAAAAATAATA =0;BC@D>>>>>>>=DDCCBBCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@8@FFFH@FDHHHHHHHHHHIHHHHHHHHDHHHIHHEHHGHEHHHHGHGBHFHFHHHHFHHHHFHHHHHHHHHEHHHEHHGHHHEEEEEEEEEHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:25:6601:158705 147 chr1 10000802 29 66S35M = 10000400 -436 TGTACATGGTGGTGGGAGCATGGAGTCCCACCTACTCAGGAGGCTGAGGCAGGAGAATTGCTTTAACCCAGGAGGTGGAGCTTGCAGTGAGCCTAGATCAC ##########################################################################??=9;5*BBA6:?9'@5;096;>8?B? MD:Z:14G20 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:1 SM:i:0 MQ:i:37 OQ:Z:##########################################################################@;9654)@@@35@<'<5;-:/<<5@?? UQ:i:9 +20GAVAAXX100126:8:61:14621:35929 83 chr1 10000818 60 101M = 10000470 -448 TGCAGTGAGCCTAGATCACGCCACTGCACTCCAGCCTGGCGACACAGCGAGACTCCGTCTCAAAAAAAAAAATAATAAATAAGAAAAGGAAAAAAAAGAAT 7D2DAD@AAA?@DBC??@9;=>AAC?BBCDCADABCCCB;BCB@BDB;BDB?CDC;ADADA@@@@A@@>@DCCDCCDDCBADBDDDDB@AAAAADDD@CCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:5H8HHHGDFFEFHHHCDDFBBDFFHEHHHHHFHFGHHHHHHHIDHHHHHHHDHHHHHHGHGGGGGGGGEGHHHHHHHHHGGHHHHHHHGGGGGGHHHHHHH UQ:i:0 +20GAVAAXX100126:8:22:20335:91038 163 chr1 10000847 29 68M33S = 10001097 350 TCCAGCCTGGCGACACAGCGAGACTCCGTCTCAAAAAAAAAAAAAATAAAAAAAAAAAGAAAAAAAAAAAAAAAAATAAAGGAAAAGCCAAATGCAGAGAA @BBBAABCBCA;A@B@CBA:CBB?CBC;BBDB9ACC?<@A??2)<4';C72<8)AC;%7(=CCBCD=################################## MD:Z:43T6T2G5G8 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:4 SM:i:0 MQ:i:37 OQ:Z:BBBBBBBBABABABAABAAAAA??ABBB?ABA7?BBA>AA@?2(?4'8A22;3'AAAAA?################################## UQ:i:40 +20GAVAAXX100126:8:65:4263:123939 99 chr1 10000877 60 101M = 10001182 403 CAAAAAAAAAAAATAATAAATAAGAAAAGGAAAAAAAAGAATACAACTCAGGAACAGCCAAATGGAGGAGATGCATGGGACAAGGTTTAGTGGGGGGCTGC CCDBDDD>>>??>A>?A?DDCCDCDDD?=CDDDDDD??>@A@C>CDADBCCCBDACCBACDDCBCDCCDCA:;9:B@@ABACBACAB@>;::555@?>DD5 MD:Z:0T0C99 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHEEEEEECEECFHHHHHHHHHEEHHHHHHHEECECCHFHHHHHHHHFHHHHHGHHHHHHHHHHHE@B@=GGFFGHHFGHDFDD?54444CCCHH5 UQ:i:68 +20GAVAAXX100126:8:48:15961:32254 675 chr1 10000919 29 27M74S = 10001301 416 ACAACTCAGGAACAGCCAAATGGAGGATTTGCGCGGGCCTTGGTTTAGTGGGCGGCGGCGGAGCTTTCTCGCCCTCCGCAAGTGAATCACCCTTCCAGTGC 8=1==>?4A############################################################################################ MD:Z:27 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:3=BCBBAC;@::3;>78;5A@############################################### MD:Z:55 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BB=5@BA@<;>ABBAA@7@A:/9@=55@<4@9)9@84@>69=70(=9:56>5;A############################################### UQ:i:0 +20GAVAAXX100126:8:22:20335:91038 83 chr1 10001097 37 101M = 10000847 -350 AAATTGAAAAGAAAATCCTAACTTTCCAAGCCTAAGTAACAAAAGGACCAGAGGCTACCCCTTTGCAAACCCCTACCTTTTCTGTGGCAGATGGGAAATTG EECED?@?ADCDDB@CCCCD;CDDDCBDDBCCCDDABAABDDDDBBBCBDBCBBCC@CCCCDDCBBDD:CCCCCACCDDDDCCACBBBDBCCBCBDDADCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:37 MQ:i:29 OQ:Z:HHHHHEGEGHHHHFEFHHHHBHHHHHHHHHHHHHHHGGFHHHHHHHGHHHHHHHHHFHHHHHHHHHHHCHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:48:6949:111164 99 chr1 10001106 60 101M = 10001431 425 AGAAAATCCTAACTTTCCAAGCCTAAGTAACAAAAGGACCAGAGGCTACCCCTTTGCAAACCCCTACCTTTTCTGTGGCAGATGGGAAATTGTAAGTACCT CCCBDDCBBDCDADDDBBCDCBBDCDCBCDACDD@ACDABCCDCCBCCABBBDDDBBCDDABBCDCABDDDDBDBCBCBBCA?A@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGH@HHHHBGGEFFFFFGAFFFA UQ:i:0 +20GAVAAXX100126:8:21:7038:31450 99 chr1 10001134 60 101M = 10001404 359 AACAAAAGGACCAGAGGCTACCCCTTTGCAAACCCCTACCTTTTCTGTGGCAGATGGGAAATTGTAAGTACCTCTAATTAATTAATTAATTGCCTTTTTTT CD@ADDAACDABCCDCCBDCABBBDDDBBCDDABBBCCABDDDDBCBBBCBCCDCBCC;DDCDBBCDC=CABDBDCDCDCDCDCDCDCDCDCCADDDDDE@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHBHHHHHGHHHAHHHHHHHHHHHHHHHHHHHIHHHHGHHHHHHE UQ:i:0 +20GAVAAXX100126:8:65:4263:123939 147 chr1 10001182 60 2S99M = 10000877 -403 GTTGCAGATGGGAAATTGTAAGTACCTCTAATTAATTAATTAATTGCCTTTTTTTTTTTTTGAGACAGAGTCTCTCTCTGTTGCCCAGGCTGGAGTGCAGT ###B;?C2%CCD@=1?CC??ACCDACCCCABCB?ACBBCCBCA?CABBC@AAAB@? MD:Z:0G98 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:###@5;B/%9>.>A>@:6:>?99>A8-A?B7;@0<98=>;@=B4B@D=<>9>,1)@7A;:*3;3;,A2=-:+:A4A=AD=-=AB8:2C?8@.ABC=>8;<*/)=:A=>+5@4=+>.:14*9@1>>>B=,:A@6>1A?8?CB=3+@@- +20GAVAAXX100126:8:25:1952:6902 147 chr1 10001276 60 14S87M = 10000924 -438 GCCCAGGCTGGAGTGCAGTGGCACCATCTCAGCTTGCTGCAACCTCCACCTCCTGCGTTCAGGTGATTCCCCCACACTTACTGGGCTGCATTCCCAGAAGG ###############BB:;?9ABA?>A=C=4?CBC4DC@DCC@;=DCACB@B@BCC=CBCCBBBCDCACDDCBCCABCCBBBAA?@CD@ MD:Z:16G70 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:###############??:4>:B=@?=?>?6;BA>ABABAB4859A@B4BBABBA>BABBABBBBBBBA=BABCBB@CBBBBCCBBCBCBCBCCCCCCCCCB UQ:i:27 +20GAVAAXX100126:8:48:15961:32254 595 chr1 10001301 29 66S35M = 10000919 -416 TTTTATGAGACAGAGTCTCTCTCTGTTTCCCAGGCTGTAGTGCAGCGCCACCATCTAAGCTTGCTGCAACCTACACCTCCTGCGTTCAGGTGATTCCCCCA ##################################################################################################### MD:Z:6C28 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:1 SM:i:29 MQ:i:29 OQ:Z:##################################################################################################### UQ:i:2 +20GAVAAXX100126:8:22:20896:187194 83 chr1 10001303 60 101M = 10000959 -444 ACCTCCACCTCCTGCGTTCAGGTGATTCCCCCACACTTACTGGGCTGCATTCCCAGAAGGTTAAGGCATTCTTAGTCACAGGATGAGATAGGAGGACAGCA 7ADDAC8A;.?C>B;ADDCDBABB@D=:BB@AAAAAABBCCB=ACAABCAA:CBCBDCB@GGFFFFFFGGGHGHEGHEGHHGF@HHHHHHHAHHHHHEFHHGHHHHHHHHHHHHHDHHFHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:63:14360:55959 163 chr1 10001309 60 101M = 10001646 437 ACCTCCTGCGTTCAGGTGATTCCCCCACACTTACTGGGCTGCATTCCCAGAAGGTTAAGGCATTCTTAGTCACAGGATGAGATAGGAGGACAGCACAAGAC @ABCAACBB;ACBBCC@BCDCBCCCCCACADCCBCAA@ACBBCCABCCCCABCACDCDBCACCBBD@CCB@DBAAB?ACBBBCABDBB@C>BBAC?C=@EA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBBBBBBBBBB=BBCBBBBBBBBBBBBBCBBBABBBBBBBBBBBBBABB?BBBABBBBBBBBBBABBBB@B>AB@B@AABA>BA?@ABAAAA5>@B UQ:i:0 +20GAVAAXX100126:8:23:14390:98082 99 chr1 10001338 60 101M = 10001748 510 CTTACTGGGCTGCATTCCCAGAAGGTTAAGGCATTCTTAGTCACAGGATGAGATAGGAGGACAGCACAAGACACAGGTCACAAAGACCTTGCTAATAAAAC CDCAADCCCBDBBCCDBBBCCDDCCBDCDCCBCCDBCDCCBBCACCCDCBDCDCCCCDCCDACCBCACDCDACACCCCBCACADCDAADDCCDCACDE@@> MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHGHHHHGHHHHHHGHHHGGE UQ:i:0 +20GAVAAXX100126:8:42:20734:56938 633 chr1 10001343 23 66S35M = 10001343 0 ACAGACATCTGAACCAGAGAAACCAAAGGTTGTATAGAATCTGTGTAAAATGAAGTTGATGCCGAATGGGGTGCATTCCCAGAAGGTTAAGGCATTCTTAG ################################################################################################948>8 MD:Z:4C30 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:1 SM:i:23 OQ:Z:################################################################################################@@@G@ UQ:i:2 +20GAVAAXX100126:8:42:20734:56938 693 chr1 10001343 0 * = 10001343 0 CGTTTCATGGAGCTGGAAAGCTAGAGCTTTCCTCTAAAAGGCAGGAAGATGGGTGCTAGTCACAGCTCTAATCCTAAATTGTGAAACCTTAAGCAATTCAT ##########################################################################################AB%6/>:)*?> PG:Z:BWA RG:Z:20GAV.8 OQ:Z:##########################################################################################@?'3-?6))>= +20GAVAAXX100126:8:63:8225:63647 163 chr1 10001369 60 69M32S = 10001736 467 CATTCTTAGTCACAGGATGAGATAGGAGGACAGCACAAGACACAGGTCACAAAGACCTTGCTAATAAAACAGGTTGCGGTAAAGAAGCCAGGAAAAACCAC @AAA>?>BB=@ACCC@@52B,B47>48A?>?B:A76:<<>5@>.C/?B8C;1?<)?0B################################# MD:Z:69 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@=A@>?;BC6=A>B??B@=:>>BBB?=13@,@27@8=AA@AA>>9959;=6?8-@-==6B>.?;)>0@################################# UQ:i:0 +20GAVAAXX100126:8:21:7038:31450 147 chr1 10001404 60 11S90M = 10001134 -359 GGAGGACAGCACAAGACACAGGTCACAAAGACCTTGCTAATAAAACAGGTTGTGGTAAAGAAGCCAGGAAAACCCACCAAAACCAAGATGGTGATGAGAGT ############ACACB>ABA?@@;=BBDB>CB?7>C?CA=CCDBCCAAA@ABC@BACBABCABACBBCCCBCCCADBCCCBDCCDCBCCACBCA@BACA@ MD:Z:90 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:############A@>>A:>A?@;@9<@AA?>B@=9B78D?*7DB?B@ABBBCC;@CB>CBBCCCCBDACC@CACBCACDBC@BB>B@CCCCACCECCCCBBCCBCCBBCBCCBAB@A@BD@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:A:->@:/;3?5;6?8=77B>)7A@?A@@@@AA@6BAA>BABBA?BABABBAABBABBBBBBBBBABABBBBBBBCBBBBB@CBBBBBBBCBCACACBCCBB UQ:i:0 +20GAVAAXX100126:8:41:12744:193967 163 chr1 10001443 60 101M = 10001607 264 TGTGGTAAAGAAGCCAGGAAAACCCACCAAAACCAAGATGGTGATGAGAGTGGCCTCTGGTCTTCCTCACTGCTCATCATACACTAATTATAATGCATTAG @CAA?B=ABCDDDADCCACCDDD@CBAAA8?>9<7:>BB)@8=A9CBBC@>6:@=?9AAB=BB>AA;@AABBBBCBBBBBBBBABBABA3>>>?*<6>>=?=A??=2:>=:@@99@;@@@=9?@@A?=:=<1=?<4@:7:<6:?672 UQ:i:0 +20GAVAAXX100126:8:67:20277:111625 163 chr1 10001509 37 90M11S = 10001528 88 TCACTGCTCATCATACACTAATTATAATGCATTAGCATTAGCACCCACCACACCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGGTAGATCGGAAGAG @CB>B>AB>B;A:C@ADDC@DBC<=A7=<=@>@AA@B@>D60@ACD?AC?>CCBCA7>B?=7CA:A>BAB::@D,;C4@6;;@9C;C= MD:Z:51G35T1T0 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:3 SM:i:37 MQ:i:37 OQ:Z:BCC@B?BABBBCA<:<<7B?ABCC?CAA<@B<@?@C?@BB??@>C;-@BBB>>A>?BBBBC::A=>1@@=A=C==BBBA:=B06B1=1:BB4>7<: UQ:i:60 XT:i:90 +20GAVAAXX100126:8:2:15613:129653 99 chr1 10001516 29 101M = 10001879 396 TCATCATACACTAATTATAATGCATTAGCATTAGCACCCACCACGCCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTTACCATGTTGGCCAGGAT CBCABCCCACADBDCDCCCDCBBCCDCCBCCDCCBCABBCABCA;BBBCCBDCDCDDDDBBBCDDDAACCCCBCDA;CCC;DDDCACCCCCDCC>CDDDE= MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:HHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHGGHHHHGHHHHHHHCHHHHHHHHHIHHICHHHHH@ UQ:i:0 +20GAVAAXX100126:8:6:3004:12551 675 chr1 10001521 29 74M27S = 10001879 391 ATACACTAATTATAATGCATTAGCATTAGCACCCACCACGCCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTTACCATGTTGTCCAGGATGGTCT @B6ACD@@CC?;A:9>C9=/;>9=@.7?9C############################ MD:Z:74 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:B@7=ABCBBB@>=?@8:@:@AC>?BB=:@:6=:@BB;??8;3@5=@;(3@3=B=;;4@+4B;A############################ UQ:i:0 +20GAVAAXX100126:8:67:20277:111625 83 chr1 10001528 37 31S70M = 10001509 -88 CTCTTCCGATCTTCACTGCTCATCATCCACTAATTATAATGCATTAGCATTAGCACCCACCACACCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGTT ################################=?:69;;3:-;;9>-2;<:=/=.78=48>195?@9?==B<-48>==02=?>?AB?:>A>?<<8=?7<=< MD:Z:32G37 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:37 OQ:Z:################################AA>655515/555502555514'4442440@3CC@DADG?08:A44114DADGG55444DD:FDFFGGG UQ:i:20 XT:i:90 +20GAVAAXX100126:8:21:3663:199637 99 chr1 10001543 60 101M = 10001860 376 GCATTAGCACCCACCACGCCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTTACCATGTTGGCCAGGATGGTCTCCATCTCTTGACCTCATCATCC CBCADCCCCABBCABCA;BBBCCBDCDCDDDDB@ACDDD@BCBCCDCDA;CCCBDDDCABCCBCDBCBBCCCDCBCBBDBBCCBDBDDCDACDCDCCDCDD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEGHHHHGGHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:63:4634:186518 99 chr1 10001553 23 71M30S = 10001924 425 CCACCACGCCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTTACCATGTTGGCCAGGATGGTCTCCATCTCTTGACCTCATCATCCGCCCACCTCA :<>6=@>8@??;=:?>>5;;@AA@;3::83;=;:4B>=@@<<;9;9:;;;=7:;;=7;;<@;>?=############################### MD:Z:71 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:23 MQ:i:29 OQ:Z:EFCFDFFFFFFBABBF@AAAEC;@>FFFF@6=@=844445GEEEF44455:>=@=4555555555CCCCA############################### UQ:i:0 +20GAVAAXX100126:8:26:17894:31587 163 chr1 10001582 60 97M4S = 10001891 407 TAGTAGAGACGGGGTTTTACCATGTTGGCCAGGATGGTCTCCATCTCTTGACCTCATCATCCGCCCACCTCTGCTTTCCGCAACCAATCGGACTGATTACA @CBAAABBCA:CCBBCCCCACCCBACBCCCC@@BBA?>?C8C@C@BCDCA@>CACCAB@A@C9>CA9@BC@B?@=<@BCC>;48?@BBB##### MD:Z:97 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBAABABBBABBBABBBBBBBBBBBBABBB>?@BBA@AB;B@BB@BAA@A@AABA@?>?AA@?B>=?9????<=5:;??=?##### UQ:i:0 +20GAVAAXX100126:8:41:12744:193967 83 chr1 10001607 60 101M = 10001443 -264 TGGCCAGGATGGTCTCCATCTCTTGACCTCATCATCCGCCCACCTCTGCTTTCCGCAACCAATCGGACTGATTACAGGCCACTACTTCACCTCATTTACAT A>CACBC>CC@AC?CCBCAAD@ABBCA>B@CDB@D>;?CCBACCCCCBC?D:C;BB@BCBDCD;BBBCCBCDCBBDBBCBCCCBCDDB@CCCCBBC;=:?<:;===DAADD?B9:?8C>AB::+09@A=A>=:9:>A>HFFHHDFB@D:HEEH==26:CFDFFA@@@=H@HEHHAHHHHHHHHA4DDAHHHHHHHHHHGCHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:23:14390:98082 147 chr1 10001748 60 101M = 10001338 -510 ATTTGGACTGGAGAAAATTCTGTATCCAGGGCCCTTGAGTGGCTGCTGGGGCCCGCTCCCACCTGGTGAAATGTACTTTCATTTTCAATAACTCTCTGCTT BE@DC@CAC?@D??DA>BCAB@CABB@ACB@;BA@CBAACBBCDBCDACC?DB9CB@CAB>=AB7@BBCCDCACBCDECBCDEECCBABBABCBB@AABD@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@@BBA???@=A@@=@<A:?@B@A@BBBABBBBBBBBABA@BAABAB?@BB>=7>@?A?@C?@A44555GEFGF####################################################################### UQ:i:4 +20GAVAAXX100126:8:28:18116:51320 99 chr1 10001794 60 101M = 10002126 432 TGGGGCCCGCTCCCACCTGGTGAAATGTACTTTCATTTTCAATAACTCTCTGCTTTTGTTGTTTCATTTTTTTCTTGGTTGTGTGTTTTCTTAAACTCTTT CBCACCCC;BDBBBBABDBCCBDDDCBBCADDDBCCDDDBCDCCDACBCBCBBDDDDBC@ACADBCCDDDDD@@BA>CBDBCCCBCDDDBDA<A?B@@AA@BA?>>@@?@@A@?AA?@?@@?AAB UQ:i:0 +20GAVAAXX100126:8:25:15890:153544 1187 chr1 10001807 60 101M = 10002149 442 CACCTGGTGAAATGTACTTTCATTTTCAATAACTCTCTGCTTTTGTTGTTTCATTTTTTTCTTGGTTGTGTGTTTTCTGATACTCTTTTTTTTTTTTTTTT @@ABB@A6BCBC;>@?ADCCABCCCC=CDCACAD@C@C5CC@BCCACABCACDCDCCADCACC@BC>7AAAB(A?(5(5ABB@DC>B>5A@CACAACA MD:Z:78T1A20 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:B>CABA?4BAAA>A8?BBBAAABBCC;ABBAABBCBBB@A?5BBBABCBBBBBBCBBBBABBABBABB?;3>'5)5ABB:@AB>=3=>>??;?<> UQ:i:14 +20GAVAAXX100126:8:24:3855:143240 163 chr1 10001827 60 85M16S = 10002054 327 CATTTTCAATAACTCTCTGCTTTTGTTGTTTCATTTTTTTCTTGGTTGTGTGTTTTCTTAAACTCTTTTTTTTTTTTTTTTTTTTTTCTTTGATTTTTCCT @CB@AAD>BBE@CCBECCCCACCBCBBBA@>BC@C=BBADDABCCBDACB/5;D>:BD3=BBA??@AD@################# MD:Z:85 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCCBB?B?BABABC=BBBBBBB+7:B;;?A3<@BB;>?BB?################# UQ:i:0 +20GAVAAXX100126:8:21:3663:199637 147 chr1 10001860 60 41S60M = 10001543 -376 TGTGCTTTCATTTTTAATAACTCTCTGCTTTTGTTGTTTTTTTTTTTTCTTGGTTTTGTGTTTTTTTAAACTCTTTTTTTTTTTTTTTTTTTTTGATGGAG ##########################################BCDCD8(;8/;>>)0.2-9A>?(>:3=89+)CDDDDDDDDDDDDDBCDCCCBABABBD@ MD:Z:14G8C36 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:##########################################@@B@B8'::->:9(/-1/;A@@*@>3<16+(ABBBBBBBBBBBBBBBBBBBBCBBBBBB UQ:i:15 +20GAVAAXX100126:8:2:15613:129653 147 chr1 10001879 29 64S34M3S = 10001516 -396 GGAATGTCGTTTCATTTTCAATAACTCTCTTCTTTTTTTTTTTCATTTTTTTCTTGGTTTTGTTTTTTCTTAAACTCTTTTTTTTTTTTTTTTTTTTTTGA #################################################################>C;'79AA94)*ADEEEEEEDEBCDDCCCBABCBC@ MD:Z:34 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:#################################################################@B8&39>>40*)?BCCCCCCBCCBCCBBBCCBBCCB UQ:i:0 +20GAVAAXX100126:8:6:3004:12551 595 chr1 10001879 29 68S10M1D23M = 10001521 -391 TATGAGATTGTTTCTTTTTTTTTTCATTCCGCGAGTTTTTTTTTTTTTCTTTTTTTTCTTTGTTGGTTTTTTTTTATATATTTTTTTTTTTTTTTTTTTTT ##################################################################################;;;<;;;;=;;;;=?428? MD:Z:4C3A1^C1C21 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:4 SM:i:29 MQ:i:29 OQ:Z:##################################################################################@@@@@@@@>@@@@EA@2@A UQ:i:6 +20GAVAAXX100126:8:26:17894:31587 83 chr1 10001891 60 2S99M = 10001582 -407 CTCTTTTTTTTTTTTTTTTTTTTTGATGGAGTCTTGCTCTGTCACCCAGGCTGGAGTACAGTGGCGTGATCTCGGCTCACTGCAACCTCTGCCTCCCAGGT ###@>BA;A@AAA@AB=ADDDDDCBCCBBDADCDCBCDCCADBBCCBDBBCCBBCACBBCACBB;ACBCCCD;BBCDBBCCBBDBCCDCCBCCDCCCBBAC MD:Z:99 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:###GBGG?GEEGGEGG>GHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:63:4634:186518 147 chr1 10001924 29 46S55M = 10001553 -425 TGTTTTTTACATTTTTTTTTTTTTTTTTTTTTTTTGATGGAGTCTTGCTCTGTCACCCAGGCTGGAGTACAGTGGCGTGATCTCGGCTCACTGCAACCTCT ###############################################@C:5==8->548?7027@84B,-B2**(%6A;>))B4B:A4-B9<==A@7>A@? MD:Z:55 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:0 MQ:i:23 OQ:Z:###############################################A?95;<3'>:08?;029B88A)+@2(**)6?8<))@7@9@5-@88;=?A7>@<> UQ:i:0 +20GAVAAXX100126:8:65:13649:138985 147 chr1 10002037 60 26S75M = 10001701 -410 GCCCGCCGAAGAGCTGGGACCACAGGCACCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACAGTGTTTCACTATGTTAGCCAGGATGGTC ###########################ADB3?8>9DC@;=BC;B>CC9DBBDCDBAACCB879C4BCBDAACC?B>DCCB?AADDBEADBCBCBAA@BAC@ MD:Z:75 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:###########################@B>;>8>9BBBB=BAB>>CBCCABBBB?BBBCA:9>B>784B6A14:35D4:*C4A:6:B@<;79AC667=<$87B:3>3C896+8A9<@9)6)A7*,+).>B*/5>9:A>777:??> MD:Z:39A23A21 PG:Z:BWA RG:Z:20GAV.8 AM:i:25 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:#################@1>>942A4?//353B2=*@5?;95B9;519BB;168:(;5C;7@8B>=5*8@79=7)2*@0)+*)0;A*+9;7AC=2527?== UQ:i:12 +20GAVAAXX100126:8:24:3855:143240 83 chr1 10002054 60 101M = 10001827 -327 GGCTAATTTTTTGTATTTTTAGTAGAGACAGTGTTTCACTATGTTAGCCAGGATGGTCTCAATCTCCTGACCTCGTGATCTGCCCGCCTCGGCCTCCCAAA AD@=CA@;DBCBDBBCC=A>9:??>6=;==?B@CCD;?CBC=CCABB5>=CD;BBC=D=<4:<< MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:FHBHHEEEEHHHEEDHHHHHHHGHGHHGHHHHHGHF@BCCBHHCBHHHHHHHHHEHCBBCDD;A@DEEHFHHHHGHHH>HHGGGEEEHHHHHHEHDEEEGG UQ:i:0 +20GAVAAXX100126:8:48:18256:122283 163 chr1 10002059 60 101M = 10002396 437 ATTTTTTGTATTTTTAGTAGAGACAGTGTTTCACTATGTTAGCCAGGATGGTCTCAATCTCCTGACCTCGTGATCTGCCCGCCTCGGCCTCCCAAAGTGCT @CA@??@ABBCBB@BCCBCCCCCACCBCBCCBCACBAA?BBCBCACDCCCA@CBCDDDBDACC?CA@DC;@CA<@BAAB?8ACBB:DA@D>ABAC@@<@@AA@>@A@@B@A?A@>BB??=;1:B> UQ:i:0 +20GAVAAXX100126:8:61:16672:156327 163 chr1 10002072 60 101M = 10002410 438 TTAGTAGAGACAGTGTTTCACTATGTTAGCCAGGATGGTCTCAATCTCCTGACCTCGTGATCTGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCG @CBBA@BCBCACB>BACCBCBDCCBACCCCCBCCABAB>ACBCCABDBCD?BA@DC;CCB?BC?AB49ACAB:AAAB?CB>9BA=CC>?C1BBA@<=C>C: MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBB@CBBBBBBBABABBBBCBBBBBBBBBBABBABBB@BBBBABBBABBAABAABB@B@@AA@@@8@@B@A@AA@A?AB=7=@6AA;@A4@@?<;7@=?@ UQ:i:0 +20GAVAAXX100126:8:28:18116:51320 147 chr1 10002126 60 101M = 10001794 -432 TCGTGATCTGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCTGGCCCTTGTTAAATTCTTTGTTCAAAATGCCAAAAACCT 85=C@;CA75=9/=ABADA@ABB@B?@CA>A@C@:A@BC@CABC;;C@CABAA?=BCCADCCCCDCCEECADCCCB@CCBCBBBAAACC@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:45==<8??14=84<>6@B@>@=B@@AAAABAA>ABCB@BABBBA@BABBBBBBBBBBCCBBBBCBCCCBBBBBBBBABCB UQ:i:0 +20GAVAAXX100126:8:65:9835:166164 163 chr1 10002137 60 101M = 10002441 404 CCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCTGGCCCTTGTTAAATTCTTTGTTCAAAATGCCAAAAACCTGGACACCCTCC @C:AABA:CBCDBACCDDB?BBDBCCCBCCABBC?8>A=B;CAA=7@C=C;?A?CCBB@@BBAABA?BDA>CB@B?=?CAAACACDB@@C@4(9C?7D>@B MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBBBBBBBBBBBBABAA?>>A?B=?>AAB???;AA?@A?A@A????A@=<>AAB?@?@@=B@?A5'9?A3?::? UQ:i:0 +20GAVAAXX100126:8:26:12389:180500 99 chr1 10002138 60 101M = 10002456 418 CGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCTGGCCCTTGTTAAATTCTTTGTTCAAAATGCCAAAAACCTGGACACCCTCCA C;B@DB;CBBDBBBCDDCB@BDBCCDCDCACCCA;B@DCBBCAB;BCABCBCBBBDDBC@CDDCDBDD@BCDBCDDDCBBBBDDDBA+ACCDADABABCDE MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHIHHGHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHGHHHHHHHHHHHHGHHHFH0DHHHHHHGGDHHH UQ:i:0 +20GAVAAXX100126:8:23:10396:142336 163 chr1 10002141 60 101M = 10002456 415 CTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCTGGCCCTTGTTAAATTCTTTGTTCAAAATGCCAAACACCTGGACGCCATCCACTG @DB:BA@CBCCCCBA;'7<49/A;.A5;<)#8?):>=C?E9 MD:Z:81A8A2C7 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:3 SM:i:37 MQ:i:60 OQ:Z:BBBAAB=BABBBBB@7@AAABABBBBBBBABBA@@>B9?@.@A@ABA=:6;AA=BA??BA>;:;ABB=B@=??1>:*6737/<;-;0;<*&6;'4=8??@4 UQ:i:24 +20GAVAAXX100126:8:25:15872:153534 83 chr1 10002149 60 101M = 10001807 -442 CCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCTGGCCCTTGTTAAATTCTTTGTTCAAAATGCCAAAAACCTGGACACCCTCCACTGGTACTGAA DE?DDDADCCCCCBCDCACDB>;ACBBBCBBC;BBBCCCBBCCCDCADCDDCDDCDDCADDBDDDCCBCBDDDDBCCCBBCBBCCCDCBBCCBACCCABDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHGHHHHHHHHHHHHHHEHHHCHHHHGHHHIHHHHIHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:25:15890:153544 1107 chr1 10002149 60 101M = 10001807 -442 CCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCCCCTGGCCCTTGTTAAATTCTTTGTTCAAAATGCCAAAAACCTGGACACCCTCCACTGGTACTGAA ?7=BADCADCAD@AA@ADCADD=DDD?CBC=D@DDACAAA@A=7CCCBCBBCCBACCCABDC MD:Z:35A65 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BBFFDF2CFBF4,555;FFFFFF5F@5444/0464'AA@A9DGGHHHHHGHFFFFFHHHHHEHHHEHHHEHGHHGHGFGGFD8HHHFHHHHHHHHHHHHHH UQ:i:4 +20GAVAAXX100126:8:47:1669:10922 99 chr1 10002166 60 101M = 10002512 444 ACAGGCGTGAGCCACCGCACCTGGCCCTTGTTAAATTCTTTGTTCAAAATGCCAAAAACCTGGACACCCTCCACTGGTACTGAAGCTCACTGAACCTCCTT CACACC;C:DCBBCAB5BCABDBCBBBD@BBD=DD;@=??DB:D?CD?D?@BBCDDDD9BA?CA@@B@CB=>7=<9CDCCAE MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHBHHHHHHHEHHHHHHHHHHHGHGHEHHBFEDDHHCHFHHEHEFHHHHHHHBHEEHEEFAAFDHFHDFHHEHHFFFFFCFCFD?;@@@HHHHDH UQ:i:0 +20GAVAAXX100126:8:21:8551:122892 163 chr1 10002192 60 101M = 10002500 408 CTTGTTAAATTCTTTGTTCAAAATGCCAAAAACCTGGCCACCCTCCACTGGTACTGAAGCTCACTGAACCTCCTTGTTCAGGAGTTTTCATAGAGCTCCAT @DAA;?ABCC=AD@BB?BCDEEEDBCDCDDCC>CCA4)7B@CCD@DCBDCAAC?BCCB?ABACBA8D=8)@@=B=?<7>?DC@DC MD:Z:37A63 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BCCCCCAAAC=ACCCBCCCCCCCCBCCBBBAA?BBB;,;BBBBBBCBCBCB@BB<7@BBCBBBBB>@7>:>BAA>A@BBA@9@;2);>B>=>748B?A>BA UQ:i:8 +20GAVAAXX100126:8:25:5802:132383 99 chr1 10002223 60 101M = 10002536 413 ACCTGGACACCCTCCACTGGTACTGAAGCTCACTGAACCTCCTTGTTCAGGAGTTTTCATAGAGCTCCATCTCCAGCCCCCTTCCCTCCCAGGAGGTTGGG CABBCCDACABBDBBCADBCCCADBDDABDBCACBDDABCBBCDBBDBCCCDABDDDBCCCCDCBDBBCCBDBBCCBCBCBDDB=CDBCABCCBC=6??>A MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHGEHHEH?7ADEG UQ:i:0 +20GAVAAXX100126:8:21:10727:109276 99 chr1 10002227 60 101M = 10002534 407 GGACACCCTCCACTGGTACTGAAGCTCACTGAACCTCCTTGTTCAGGAGTTTTCATAGAGCTCCATCTCCAGCCCCCTTCCCTCCCAGGAGGTTGGGGTGG CCC?CACCDBBCADBCBCADBDDCBDBCADBDDABCBBDDBBDBCACDCADDDBCCCCCCBDBCCCBDBBCCBBBBBDDBBCDBCBBACDCCCDCDDD@DA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHGHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHGHG UQ:i:0 +20GAVAAXX100126:8:28:14388:67948 163 chr1 10002344 60 101M = 10002661 417 TAATCAAAGTCCTCTAATTTGGTCTTTCTGGTGAGCAGCCCCAACCCTGAGTCACATCATTAGCATAGACTCTGGTGTGTTCTAAAGGGGCTCCTTATGAA @CCBABCCBBBCDADCDCCCBCBADCCBDCCBAC=AAB>39@:B:46?@@@@A<@@A;< UQ:i:0 +20GAVAAXX100126:8:48:18256:122283 83 chr1 10002396 60 101M = 10002059 -437 CACATCATTAGCATAGACTCTGGTGTGTTCTAAAGGGGCTCCTTATGAATAGCAAAAGACAATCCTATCACTCAGGAAATTCCAAGGAATTTAGGAGCCCT DDCCDCCDCDCCCCDBCCDCCCACACADDCCDDDBBCBCDCCDCCCBDCCCBBDDDCBBBDCDCCCCDBBCDBDBBDDCDDCBDDBBDCDDCDCBDCACCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:61:16672:156327 83 chr1 10002410 60 101M = 10002072 -438 AGACTCTGGTGTGTTCTAAAGGGGCTCCTTATGAATAGCAAAAGACAATCCTATCACTCAGGAAATTCCAAGGAATTTAGGAGCCCTGTGCCAGAACCAGG EDCDDCDCACACADDCCDDDBCABCDCCDCCCBDCCDBBDDDDBCBDCDCBCCDBACDBCBBDDCDDCBDDBBDCDDCCBBDBCCCCACBCBDBDCC@CBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHIHHHHHHHIHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:65:9835:166164 83 chr1 10002441 60 101M = 10002137 -404 TGAATAGCAAAAGACAATCCTATCACTCAGGAAATTCCAAGGAATTTAGGAGCCCTGTGCCAGAACCAGGGAAAAAGACCAAATATATATTTCATTAAACT EDDCBDCCDDDDBCCD?DCCCCDBBCDBDBBDDCDDCBDDBBDCDDCDBBCBCCCCACBCBCBDBCBCBB@@DDDDBBCBDDCCCCCCCDDDBCDCDBBCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHGHHHHHHHHHHHEHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:23:10396:142336 83 chr1 10002456 60 101M = 10002141 -415 AATCCTATCACTCAGGAAATTCCAAGGAATTTAGGAGCCCTGTGCCAGAACCAGGGAAAAAGACCAAATATATATTTCATTAAACTTTACTACATGATACA EDDA?DC>CCCBCDCADDCBAABDDBBDBADCDB>D?>CC@ACBCBDB>BCBCBBA@DDDD?BC@DDCACCCCCDDDBADCDDBCDDCCCCCBCCBC:@BB6C@<<>?A?BCC49C?BC?B?CCC6D@D>7BC>C@BBABBBAAAB@B=<@A9B?:C>??8BB@4=@>BB>B@BBB1ABA=D>>@?;=773/65608?DC?9:78CBAB>6=C=ACDAB@3BBC@B7)/3B>CA?AAA)?DA@CA56'>A>=>;;<251-236.9?B@@5576BA@B>6;B:>BBBA@5?B@?@9'/4BCBBBBC>(>B?>B@9>?5BCB9DAAACBB@=BBBCCECBCCDCDDCDCDDBCBBBDCACBBABCBB@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:C>CBB@BABBACB-CC@CBBABBA@CBBBBBAACC9BBBCBBC@>BCCCBCCBCCBA@CCCBCBCBCCCCCCCBCCBCCCCBCCCCCBCCCCBBCCBBBCB UQ:i:0 +20GAVAAXX100126:8:25:5802:132383 147 chr1 10002536 60 101M = 10002223 -413 TAAACTTTACTACATGATACAAACTACAGTTTGGAAAGACATTTAGGAATGGTAGAACAAAACAAGTGAGAAAATATAATATGTAAAAGGTCTTTTTGTCA BDBD@@D@AA>DB>AB8BBB:ABB;ADBCCB@CCACDCBBBDDDDCDDDDBDBBBBCAABDCBA@@AC@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:CB@@CBBBBB=BBCBCCBBCBBBCBABCCBBBBBBBACBCCBBCCCBCCCCCCBCCBCCBBBABCCCCCCCCCCCCCCCCCCCCCCCCCB@BCCCCCCCCB UQ:i:0 +20GAVAAXX100126:8:46:6526:26315 99 chr1 10002542 60 101M = 10002857 415 TTACTACATGATACAAACTACAGTTTGGAAAGACATTTAGGAATGGTAGAACAAAACAAGTGAGAAAATATAATATGTAAAAGGTCTTTTTGTCATTTCAA CDC?DCACCBDCCACDDADCACCBDDBCDDDCDACCDDCCCDDCBCBCCDDACDDB@CAABBDCDDDDCCCCDCCCBCCDDDCCABDDDDAAA@DCDDCEE MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHGGHGGIHHHHHHHHHHHHHHHHHHHHHHHGHHHHHGGEGHHHHHHH UQ:i:0 +20GAVAAXX100126:8:61:8680:44720 163 chr1 10002550 60 101M = 10002899 449 TGATACAAACTACAGTTTGGAAAGACATTTAGGAATGGTAGAACAAAACAAGTGAGAAAATATAATATGTAAAAGGTCTTTTTGTCATTTCAAAATACAAG @AA@7;B?D@=@;9A@BAACCEECB:CCBCCCC9=B7/8?A09?ADDAAA=B:C= MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@?@@4=A?CA@?@BBCCB@9BBCBBBA7?B=59CA/:@BBB=B=?A4?>?9;@>5>/;?B@B;AA9:98@@A<<: UQ:i:0 +20GAVAAXX100126:8:28:14388:67948 83 chr1 10002661 60 101M = 10002344 -417 GCATGGTGGCTCACACCTGTAATCCTAGCATTTTGGGAGGCTGAGGCAGGAGGATCCCTTGAGCCCAGGAGTTGGAGACCAGCCTGAACAACATACTGAGA DDCDCADCCCDCCCBCCCACDCDCCCDABCDDDCBBBDBBCCBCBBBDBBCBBCDCCCDCBCBCCBCBBCADCBBDBACBDBCCCBDBBDCBCCCCC@CBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHFHHHHHHHHHHHHGHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:5:7404:40226 163 chr1 10002668 37 91M10S = 52476437 42473869 GGCTCACACCTGTAATCCTAGCATTTTGGGAGGCTGAGGCAGGAGGATCCCTTGAGCCCAGGAGTTGGAGACCAGCCTGAACAACATACTGAGACCCCGTC @CBB?A?BACAA@<>ACDED@>@CCCCCBAC@?BB;6<8?AC=B76A38AAD=6A8/A:@7CC=:D7@BA7'B38ABBCB>7B@:BC72D########### MD:Z:91 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:37 OQ:Z:BCBACA@ACC@@A@:@CCCC?=>BBCCB@@B>>BC>:?=ACB>?99=08=BB97;2.=7=8AA=2A9?=?6(>16BBAAA=8=>:>@73@########### UQ:i:0 +20GAVAAXX100126:8:43:16629:51559 163 chr1 10002681 60 100M1S = 10002869 288 AATCCTAGCATTTTGGGAGGCTGAGGCAGGAGGATCCCTTGAGCCCAGGAGTTGGAGACCAGCCTGAACAACATACTGAGACCCCGTCTCTACAAAAAATA @DCA@BBBBCCBBBCCCCCCBDBCCCBCCCBBA=:7;B@B;?;>=C;ACB::@8B?==?C>==@AA8A<=?>@;>BA?;@9:B<8>5:9AAA## MD:Z:100 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBCBBBBBCCCBCBBBBBBBBABBBBBB@A@:=:@BAB>?<B?>6<:?::6@B?<?:?:9?:;<<@61-9=?=?=?669?:7=145;8?## UQ:i:0 +20GAVAAXX100126:8:45:10866:114173 99 chr1 10002696 60 101M = 10003041 434 GGAGGCTGAGGCAGGAGGATCCCTTGAGCCCAGGAGTTGGAGACCAGCCTGAACAACATACTGAGACCCCGTCTCTACAAAAAATAGAAAATTAGTTGGGG CCCACCDCDCCBCCCDCCDCBBBDDBDCBBBCCCDCADBCDCDABCCBBCBDDACDACCCADBDCDABBB;BBDBDCACDDDAABBADDDDCDCCBDC>E# MD:Z:100T0 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGGGHHHHHHHHGHHCH# UQ:i:2 +20GAVAAXX100126:8:65:6494:152486 99 chr1 10002718 60 101M = 10003049 431 CTTGAGCCCAGGAGTTGGAGACCAGCCTGAACAACATACTGAGACCCCGTCTCTACAAAAAATAGAAAATTAGTTGGGTGTGGTGGTCCATGCCTGTAGTC CDC@DCCCBCCCDCBDBCDCDABCCBBDBDDACDACCCACBDCDABBB;BBDBDCACDDD@@BBADDD@CDCCBDBCCABCBCCBCCBCCCCCCDCCADDD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEEGGGHHHDHHHHHHHHHFHHHHHHHHHHHHHHHHHHGHHH UQ:i:0 +20GAVAAXX100126:8:65:4031:65332 163 chr1 10002722 60 101M = 10003065 443 AGCCCAGGAGTTGGAGACCAGCCTGAACAACATACTGAGACCCCGTCTCTACAAAAAATAGAAAATTAGTTGGGTGTGGTGGTCCATGCCTGTAGTCCCAG @C@BABBCCB@CBBCCD@8CC>BCBA5A?@D>AACABC@B=BAAB@CBCC=?B MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBAABBABAB?BBBAB8BA=@BBBABBBCBCBCBBCBB>;BB7AB@B0B>@B;B@@ABABBCCBDACCCCC@CC@CCCCAC>AAAA??A?8AC@?@?98C;@@?A?8B@64AA=@@>76>7?<:@;?@8?@####### UQ:i:0 +20GAVAAXX100126:8:27:6582:134282 163 chr1 10002742 60 94M7S = 10003068 426 GCCTGAACAACATACTGAGACCCCGTCTCTACAAAAAATAGAAAATTAGTTGGGTGTGGTGGTCCATGCCTGTAGTCCCAGCTACTCCAGAGGCTGAGGGG @BCCABB@CCACCAADBCCCBCCC;ABDCECACDCCBBBB@@CCB?CCCCABD@CCBCCBA;8>B=AAC@=A???>=B?A64B######## MD:Z:94 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCCBBCBBBBBCBBBBBBACBBBCBBBCCBBBBBBBBBBC@BAB?BBB>BBBA?B@AA?BA??=@@8A@A?B;>6>A=@AB=@B>>>AA<==:BB@=BC>@>>BB:@C@BBA@AAAC??CBCC@CBCBC@CCCACCCA@BA;BBBBA@BB@ MD:Z:79G21 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:>?:1=<=7<3=><>==:?@;BABBABAABA=BBBABBBBABABBBABBBBBB UQ:i:31 +20GAVAAXX100126:8:43:16629:51559 83 chr1 10002869 60 101M = 10002681 -288 AGGCTGCAGTGAGCCAAGATTGTGCCAGTGCACTCCAGACTGGACAACAGAGCAAGACCCTGTCTCAGGGGGCTGGCCACGCATGGTGACACATCCCTGTA :DCDDCCDACCDCCCDDBCDCACBCBDACBB@CDCBDBCCCBBCBDBBDBCBBDCBBCCCCADCDBCBBBBBCCBBCBB;BBCCBACBCBCBCDCCCA@CC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:;HHHHHHHHHHHHHHHHHHHGHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:63:6936:45669 611 chr1 10002875 29 35M66S = 10003208 424 CAGTGAGCCAAGATTGTGCCAGTGCACTCCAGACTGGACAACAGAGCAAGACCCTGTCTCAGGGGGCTGGCCAAGCATGGTCACACATCCCTGTAGTCTGT ##################################################################################################### MD:Z:35 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:0 MQ:i:37 OQ:Z:##################################################################################################### UQ:i:0 +20GAVAAXX100126:8:61:8680:44720 83 chr1 10002899 60 101M = 10002550 -449 CACTCCAGACTGGACAACAGAGCAAGACCCTGTCTCAGGGGGCTGGCCACGCATGGTGACACATGCCTGTAGTCTGAGGAGCTCAAGGTTGCAACAGTGAG 646:>:CD9A=>@B=DDBA=CCCA?C>B<:98;B:CBABBBBBB?CBCCC?ACADACBDBBDBACBADBADCBBDCCDAABDC MD:Z:64C36 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:47;<@A?@@A>EECHHCF?DDHAHHHGEHHHEEHHHBFB9DGCGEG?BBFBHBHHHGHHHHHCHHHHHGGGHHGHHHHHHHGGHGHHHHHHHHHHHHHHHH UQ:i:33 +20GAVAAXX100126:8:28:18332:173597 163 chr1 10002902 60 97M4S = 10003158 356 TCCAGACTGGACAACAGAGCAAGACCCTGTCTCAGGGGGCTGGCCACGCATGGTGACACATGCCTGTAGTCTGAGGAGCTCAAGGTTGCAACAGTGAGCCA @BCBAB@CBCCACBACCCCBCDBBACDDCBBDBC@B?@BABA?B=C>:AC:?B6BC=B:A?@=@BA;=?>A@BA875:BACA##### MD:Z:61C35 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BBCBBBBBBBBBBBBBBBBBBBA@BBCBBBBBBAABAABBAA@A?B=?AA>??3??=>:?@?=@??;=:7>?=:5=17:494>=?6>?:13:?@=?##### UQ:i:31 +20GAVAAXX100126:8:62:6496:91010 99 chr1 10002902 60 101M = 10003193 391 TCCAGACTGGACAACAGAGCAAGACCCTGTCTCAGGGGGCTGGCCACGCATGGTGACACATCCCTGTAGTCTGAGGAGCTCAAGGTTGCAACAGTGAGCCA CBBACDADBCDACDACCDCBCDCDABBDBBBCBCCCCCC@DBCBBCA;BCCBCBBDACACCBBCDBBCCCBCBDCCDCBDBCDCC>DCCCB@BABCDD@D> MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHAHHHHFGGFFHHHDHB UQ:i:0 +20GAVAAXX100126:8:45:20504:142467 163 chr1 10003007 60 90M11S = 10003337 430 CGCACCATTCCACTTCATCCTGGGCAACAGAGCAAAACCCTATCTCTATGAAAAAAAAAAAAAAGCAAGAGGCTATCTTCTCTGAGGTTTACTATAGTTAA @;BB>AACABCCACCBDCCDDBDCCCDBCCCCBCCCB@A?C?CCBCDCACACD@DDDDDDBDCB=A@B;/<6688.5B?4:>8:?@B>C5BA1>A<;################################################################ MD:Z:38 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:=7@BABA@;@;@28CB;:6A;:39>@@<@5A@/;A>?################################################################ UQ:i:0 +20GAVAAXX100126:8:45:10866:114173 147 chr1 10003041 60 11S90M = 10002696 -434 GCAACAGAGCAAAACCCTATCTCTATGAAAAAAAAAAAAAAGCAAGAGGCTATCTTCTCTGAGGTTTACTATAGTTAACTTTTACAAGTATTTAATCATAA ############?8.D=4.,9388?<3CCACCB@BCCBC@BB=B@BCBACBBBAA@BCBABDACCDDCCCCCDCCDDCBCDADCCCCBBBBB:C@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:2;901225?A>@AAAAA@A@@AB@AABBBABA@BB>ABABABBABB?B@BBBBBAABB@BBBBBABBBBBBBBBBBBCCBCCBBBBBBBCBBBBBBBBBBB UQ:i:0 +20GAVAAXX100126:8:45:5977:75469 163 chr1 10003052 60 71M30S = 10003336 384 CTATGAAAAAAAAAAAAAAGCAAGAGGCTATCTTCTCTGAGGTTTACTATAGTTAACTTTTACAAGTATTTAATCATAATTGCTTAGATTGAATTCGGTTC @DBBABCCCCCDCBDDDA=.7@@4)@@397:6?:241'-%366C6/0?-:37A6>=3@BC303<=45';B############################### MD:Z:71 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBBBBBBBBBBB?9,6==1*?>13683=9224'-%4:5A90.:-55688;729@A402;902'7@############################### UQ:i:0 +20GAVAAXX100126:8:63:2945:69121 83 chr1 10003053 60 5S96M = 10002736 -412 TTCTCTATGAAAAAAAAAAAAAAGCAAGAGGCTATCTTCTCTGAGGTTTACTATAGTTAACTTTTACAAGTATTTAATCATAATTGCTTAGATTGAATTCG ######@A;AA@A?AA@DDDDDDBBDDBDBBCCCDCDDCDCCBDBADDCBCCCCCADCDBCDDDCBBDCACCDDCDCDBCCDCDCBCDCDBCDCBDCBC;C MD:Z:96 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:######DD@GGGGEGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:65:4031:65332 83 chr1 10003065 60 101M = 10002722 -443 AAAAAAGCAAGAGGCTATCTTCTCTGAGGTTTACTATAGTTAACTTTTACAAGTATTTAATCATAATTGCTTAGATTGAATTCGGTTCAGTATATATTCCT EEDDAD=?=A>DCBCCCDCDDCDCCAD@A?DCCCCCC6ADCACADDDCBB?CACCDDCDCDBCCDCDCBCDADBCDCBDCDD;BADDBBACCCCCCDBCCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHGHAD@DAHHHHHHHHHHHHHHGHFHEHHHHHHH7HHHFHGHHHHHHEHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHIHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:27:6582:134282 83 chr1 10003068 60 101M = 10002742 -426 AAAGCAAGAGGCTATCTTCTCTGAGGTTTACTATAGTTAACTTTTACAAGTATTTAATCATAATTGCTTAGATTGAATTCGGTTCAGTATATATTCCTGGA EDDCCDDCACCCCCDCDCCDCCBDBAD@CBCCCCDADCDCCDDDCBBDDACCDDCDCDBCCDCDCBCDCCBCDCBDCDD;BADDBDACCCCCCDDCCABBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HIHHHHHHEHHHHHHHHIHHHHHHHHHDHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:26:6455:142223 163 chr1 10003070 60 101M = 10003403 433 AGCAAGAGGCTATCTTCTCTGAGGTTTACTATAGTTAACTTTTACAAGTATTTAATCATAATTGCTTAGATTGAATTCGGTTCAGTATATATTCCTGGAAG @CBBBABBCBDCCADCCDCDBBCCBCCCADCCBC>BAB;CBBBC?CDC@B?CC@DCBDBABCCABD@CCB@CCABBB@;B@BCBBAB>>C;BC?CBBCBDB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCBBBBBBBBBBBBBBCBCBA@BB@BCBBBBBAC@BBB@BBAABBBBB?AABBBAABBA@ABBAABABB@@@A@@B@@BA>@BAA9@A?@@@>@ UQ:i:0 +20GAVAAXX100126:8:68:8704:167789 99 chr1 10003123 60 101M = 10003414 361 AATCATAATTGCTTAGATTGAATTCGGTTCAGTATATATTCCTGGAAGTTCACTATTATTATTATTACTAGGTCCTGAGAATATGAAACAACCATAAAAAT CDB@CCCDCABBDDCCDCDBDDCDB;CBDBCCAC>CC>CDBBC9CDDC@DBCADCCDCCDCCDCCDCADCACCBBDBBCDDCCCBDADACDACDCDE=E@; MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHGHEHHEHHHHH@HHHHEHHHHHHHHHHHHHHHHHHHHHGHHHHHHGHHHHHHHHGHHHHHHHHHHBHG> UQ:i:0 +20GAVAAXX100126:8:28:18332:173597 83 chr1 10003158 60 101M = 10002902 -356 ATATTCCTGGAAGTTCACTATTATTATTATTACTAGGTCCTGAGAATATGAAACAACCATAAAAATGAGTGATGAATTCAATATATATTATTCTGACACTG BDCEBDCBCCDD?DDBCACCDCCDCCDCCDCCCCDBADCCCBDBDCCCCBDDBBDBC@BCDDDDCCBCACBCCBDCDDBDCCCCCCCDCCDDCCBCC:BCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:FHHHEHHGHHHHGHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHH UQ:i:0 +20GAVAAXX100126:8:25:10200:154913 147 chr1 10003177 60 101M = 10002841 -436 ATTATTATTATTACTAGGTCCTGAGAATATGAAACAACCATAAAAATGAGTGATGAATTCAATATATATTATTCTGACACTGAGAAGTATTGTAACAAAAA =>455>8=@B@B@5>=8AD>?DBABB@CCDCCC?D@>BBCBCAC>AABAB>BBCDCCCCCCBBBBCCBDBCDAACDCABBBAABAC@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:9<<6>????A>@:<97340>6<=@@@?6:69?AAABBBABBBBBBABBBB@B@CBBBBBBBBBBBBBBABBBCBB UQ:i:0 +20GAVAAXX100126:8:62:6496:91010 147 chr1 10003193 60 101M = 10002902 -391 GGTCCTGAGAATATGAAACAACCATAAAAATGAGTGATGAATTCAATATATATTATTCTGACACTGAGAAGTATTGTAACAAAAAACAAGTTTCTAGGTCA CA=2CAAB@B>>;;>AA5BB>BBAACBACDABEA@BCBBCBCCCCDCACCADCCDDACCB@A?BAACBCDACCDBBCCABCCCBBACBDACCCBBB@@CB@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:B=81??>?>@>;<9:884:>9'>>351>;77?9< MD:Z:33T24A12 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:###############################FFFAA43.-,64<>D551-1.421=09:>7::)@:55444/+444/43+4>::3B>=+@ACA>=A=ACB?AC@BBB>BB@CCCA=CB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBCBBA;BBBBBBBBBBBB@@ABA?BB@BBBBBBBBBBABBB@BB@BBBABBABAAAB<@AA@@><>A????ABA?B;@?@@A@??;6:=A UQ:i:0 +20GAVAAXX100126:8:63:6936:45669 659 chr1 10003208 37 9S92M = 10002875 -424 GAGAATATGAAACAACCATAAAAATGAGTGATGAATTCAATATATCTTATTCTGACACTGAGAAGTATTGTAACAAAAAACAAGTTTCTAGGTCATACCAA ##########B@19:97;=>;C?@43?;B@9(0<>5>BD8(28C=3B97C>?C,5B?>?08;3@*>A@;?CBCA@?-C'9:BA;.3<7@B5'.:>7AA+6AA@@6<>1>+>?85=C@B>:=-@)6:AC86-)=A>5@589@ UQ:i:7 +20GAVAAXX100126:8:68:17460:23672 99 chr1 10003272 60 101M = 10003550 378 CAAAAAACAAGTTTCTAGGTCATACCAAAAGCCGGGCAAAGTGGCAGATTCCTGTAATCCCAGCTACTCAGGAGGCTGAGGGGGGAGGATTGCTTGAACCC CCD:@DDA@A@BD>BDCCCCBCCC?BCDDDAB=;C@?B@@=;===?A;BABB@ABC?:=BBCCADCAD@>C>DCC=@?@ACCC>>???>?DACAD<=:>?= MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHGGHHHCCCGHDHHHHHHHHHHGHHHHHGHDHHCCGCGD@DDDDG=GGHHEGHHE=EHHHHGHHHHGCHDHHH?DDDFHHHCCCCCCCHGHDHADDCDCCDCCCB@CBAACCBD@BCDCBCDCBDACC;CBBDBBBDBDBADCBBDACBCBBDBDBCCCB;>CBCCBCBCDCBBBCDCBDBCCCBCCCDABDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BGHHHHHHHHHHGGGGGGHHHHFHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:45:20504:142467 83 chr1 10003337 60 101M = 10003007 -430 ACTCAGGAGGCTGAGGGGGGAGGATTGCTTGAACCCGGGAGGCAGAGGTTGCAGTGAGCAGAGATAGCGCCATGCCATTGCACTCCAGCCTGGGCAACAAG DDD@DCCDCCCB@CABACC@DCBCDCBCDCBD@CC;CCBDBBBDBDBADCBBCACBCBBCBCBCCCB;BCBCCBCBCDCBB@CDCBDBCCCBBCCDC@CDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHDHHHHHHHGGGGGGHHGHHHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:26:6455:142223 83 chr1 10003403 60 101M = 10003070 -433 GCGCCATGCCATTGCACTCCAGCCTGGGCAACAAGAGTGAAACTCCATCTCAAAAAAAAAAAAATCCTTTCATGAAATTACTACCAGATGTTGGCCCTTCA >6=@>=;A@>AB@BACBDDC>BCBCDC@?AAAAA@ADDDDDCDCCDDDACCBDDCDCBCCBCBDBCCADCBCCCCBCBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BAADA?BDDBD55444DBFBFHHIHHFDBFDEFFFFHHHHHHEFHHHHHDCGGGGGGGHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:68:8704:167789 147 chr1 10003414 60 30S71M = 10003123 -361 GTTGCAGTGAGCAGAGATAGCGCCATGCCATTGCACTCCAGCCTGGGCAACAAGAGTGAAACTCCATCTCAAAAAAAAAAAAATCCTTTCATGAAATTACT ###############################B@<347:@::>>A@CC=B>7A@@C>9A;<:;7>;28>>>CCCCCCCCCCCCBCCCDBCBCCBBBBBBAC@ MD:Z:71 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:###############################?=;002AB@?=:=?>?@<@<=<>:@@39<<5?;:B71?A8>:07B49;9-6.5?A9B@;A#################### MD:Z:82 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:???AC@:BCCCBCCCC@CACB?CCCCB=ACCC@4808@7>=4@4-==9=;14@3875,2+7>?;A<:A#################### UQ:i:0 +20GAVAAXX100126:8:45:3960:5771 163 chr1 10003525 60 101M = 10003872 447 CATCCTTAGCAAAGAAAGATCTTCAAAGCAGGCACTTCCATGATCCACATACTTTGGATGCCCTTAAAGGGGGGGAAAAAGGGAGAGGAAGGGAGGAGAGG @CBA@BBBBBCDCABEECCCBECBDEECBACCCD@CAAA?CBCC?CCACDAADADCCDCBACCCCB?DCCADDAB>@BB=BBCBB0CB@DABC@AACC?DB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCBCBBBBCBCBBCCCBBBBCCBCCCBA?BBCCBBBBBCBBBBABBBBBBBBBBBBABCBBBBBA@BBBABBBB;?A?<@?@;@@ UQ:i:0 +20GAVAAXX100126:8:26:5030:62604 99 chr1 10003529 60 101M = 10003851 422 CTTAGCAAAGAAAGATCTTCAAAGCAGGCACTTCCATGATCCACATACTTTGGATGCCCTTAAAGGGGGGGAAAAAGGGAGAGGAAGGGAGGAGAGGAAGA CDCACCCDDCDDAADCBDDBCDDCBCCCBCACDBBCCBDCBBCACCCACDDBCDCBBBBDDBDDCCCCC>B>DDDDCCCDCBCCDDCCCDCCCCCCDDED5 MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHEGCHHHHHHHHHEHHHHHHHIHHGHFHHIHH4 UQ:i:0 +20GAVAAXX100126:8:26:15631:70339 83 chr1 10003538 60 101M = 10003203 -435 GAAAGATCTTCAAAGCAGGCACTTCCATGATCCACATACTTTGGATGCCCTTAAAGGGGGGAAAAAAGGGAGAGGAAGGGAGGAGAGGAAGAGGAGGAGAA DEDDCCDCD?CDDDABDCBBCCDDCBCCBCDCACBCCBCADCBBCCBCC@DCDDCBBBBBBDDDDDCBBBDBDBBDDBBBDBBDBDBBDDBDBBDCBBBDC MD:Z:61G39 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHCHHIHGHHHHHHHHHHHHHHHHHIHHHHGHFHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:35 +20GAVAAXX100126:8:68:17460:23672 147 chr1 10003550 60 101M = 10003272 -378 AAGCAGGCACTTCCATGATCCACATACTTTGGATGCCCTTAAAGGGGGGGAAAAAGGGAGAGGAAGGGAGGAGAGGAAGAGGAGGAGAAATAGCATTTCAA CE@CBA?1B>)8.>1%94=096A;>->88CAA=:4=.?7A0CAC<>B78=A=CCDC8C7?==?:?@9@BCC;.=<'23<-%616384?;?'=74?B?997<0?3@2A>B99>94>B9BBBB;B6@>@A=AA;;BB@8ABB?@?>>?BB@B;?BBB>>ACB:BA?982;8A>=?DDD==?CBBBADDCDA>=>A@=;;@?A@=;8<13=,<;<78)=7<=2=72A>B@>B;>*<@==@CDACHHHDDDHHHHGHHHHCADAGCD>4CEFCE?44234245554+44441443FAFFCF=C,?254444-5559F?DDCC.?A654'5:GCGE4 UQ:i:0 +20GAVAAXX100126:8:25:18450:91287 163 chr1 10003652 60 101M = 10003979 427 TCTGCTTTAATATATATATATTTTTTGAGACAGAGTCTCACTCTGTCATCCAGGCTGGAGTGCAGTGGTGCAATCTCCGCTCACTGCAACCTCCACCTCCC @CCA?A@ABCCCCACCCCCCDCCCCCBCCC@CACB>@C?B@DBD@BCCCAACB=BDBCBA@=A9AA<=>ABBB>=@BAAB>BA?AB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCBBCCCCBBBBBBBBBBBBCCCBBBAABBAB@BB?BBABBBBBBABBB@BB??AAAA?@;A?<@>@@9A@?>@>>>@>B?<6?>@@@>:???@>>>9>:@ UQ:i:0 +20GAVAAXX100126:8:44:17629:79062 1187 chr1 10003678 60 99M2S = 10004001 423 GAGACAGAGTCTCACTCTGTCATCCAGGCTGGAGTGCAGTGGTGCAATCTCCGCTCACTGCAACCTCCACCTCCCGGGTTTAAGCGATTCTCCTGCCTCAG =B:6:=::9ABD61?DDDCCCD97@@>374=48?AA@<;@BD8B46CB5>C<?@2=6@5;@B@B;6=BA;A4:@A1;A:CA9;@B>85/:2?7*'5)4>8':4&3=?:8BABA07B@### UQ:i:4 +20GAVAAXX100126:8:2:19645:190338 83 chr1 10003700 60 101M = 10003434 -366 TCCAGGCTGGAGTGCAGTGGTGCAATCTCCGCTCACTGCAACCTCCACCTCCCGGGTTTAAGTGATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGGATTAC 5ECACCC?;CD?BBCD=A>=:?9==?AA@D=DDBHHGHHHFHHHHHHHHHHHHHHHHEH UQ:i:0 +20GAVAAXX100126:8:25:1734:195910 99 chr1 10003819 60 101M = 10004149 430 TGGCCAGCTAATTTTTGTATTTTTAGTATTTTTGTATTTTTGTTTCGCCATGTTGGTTGGGCTGGTCTCCAACTCCTGGCCTCAAGTGATCCACCCACCTC CBC@CCCCDCDCDDDDBBBCDDDAACBCCDDD@@ABCDDDA@AA@=;BBCCBBDBCCDBCCBDBCCBDBBCDADBBDBCBBDBCDCCCDC@CC:CCDBAED MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHGGHHHHGGHHHHHHHGGGGHHHHGGGGGEHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHIHHHFHHBHHHHFHH UQ:i:0 +20GAVAAXX100126:8:26:5030:62604 147 chr1 10003851 60 101M = 10003529 -422 TGTATTTTTGTTTCGCCATGTTGGTTGGGCTGGTCTCCAACTCCTGGCCTCAAGTGATCCACCCACCTCAGCCTACCAAAATGCTCGGATTACAAGTGTGG C@B;)?9)8=@BAC@C?CB?=7ABCCBCC@C@@CDA>?=DCB3BBA6BBCBDBCCCBDBCCCCCBCC;DACDCABAB?@@BD@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@<=AA?AA@@@;2A?ABAABAB?BBBB9?@BBB4BBB6BBABBBBCB@CBBCCBBBCBCCCBCCBCCCCCCCCB UQ:i:0 +20GAVAAXX100126:8:45:3960:5771 83 chr1 10003872 60 101M = 10003525 -447 TGGTTGGGCTGGTCTCCAACTCCTGGCCTCAAGTGATCCACCCACCTCAGCCTACCAAAATGCTCGGATTACAAGTGTGGCCCCTTGTAATAAAAATTTAA EA?A,@CCCCCADCDCBDCCDCCCBBCADBDDACBCDCBACCBBCCDBDBCCCBCBDDDCCBCD;BBCDCBBDDACACBBCCCCDCABABCDDDDCDBCDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HFFD.CHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHFHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:4:16802:116090 611 chr1 10003895 60 101M = 10004234 439 TGGCCTCAAGTGATCCACCCACCTCAGCCTACCAAAATGCTCGGATTACAAGTGTGGCCCCTTGTAATAAAAATTTAATTTTTTGGAATATGACTCTTGGA 1.)4=???3/>@,=<<30+->;>6<<<:;>:+:6;;;<<:9*31=;;99=97=:<@>:==AA7=<9::9:>765198:9+=;:5:A/<>:::86<9@=:.B MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:5..54544/.44+55473+0B@BBBCB;8@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBCBCCBB@BBBBCCBBAABBBBBBB@BBBBB>AABBBBBBBBBBB@BAB@B=@BAAABB@A@@@@??<;1= UQ:i:0 +20GAVAAXX100126:8:47:13004:90024 147 chr1 10003910 60 60S41M = 10003567 -383 TTGTATTTTTGTTTCGCCATGTTGGTTGGGCTGGCATCCAACTCCTGGCCTCAAGTAATCCACCCACCTCAGCCTACCAAAATGCTCGGATTACAAGTGTG #############################################################>:;6=5@6BB7;;A8(@CCC>;AAC;>?A=AB>A@B@ MD:Z:41 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:#############################################################@>><>9>5A@6:6A1,>BBB=7A?BBAA?;B7=?B@@BAB UQ:i:0 +20GAVAAXX100126:8:2:13260:81311 163 chr1 10003923 60 90M11S = 10004222 399 CTACCAAAATGCTCGGATTACAAGTGTGGCCCCTTGTAATAAAAATTTAATTTTTTGGAATATGACTCTTGGAGTTTTTCTTTTTCTTTTTTTACTTTTGG @C4:97@:CB>??>9=;<@:??2?-4?BACACC:<=6:<:16>@9BAD9@889:?BBB3?96>;?=;BDB:?4877?(?;C?D>78D6@############ MD:Z:90 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:B@4;66=7@A?=@A@=79=9@<.;'1:A?B@BA7>@6>?>47><B?0<;7><<<=BA@;<0755>);;B>B<53@4@############ UQ:i:0 +20GAVAAXX100126:8:1:17413:63912 1187 chr1 10003958 60 101M = 10004229 371 GTAATAAAAATTTAATTTTTTGGAATATGACTCTTGGAGATTTTCTTTTTATTCTTTTACTTTTGGACTCTGTTGGACTTCAGTTGTTTTTCTTTAATGGC @BBCBACCCCCCCADCCCCCCBCCDCCCCCADBDB?*34->CCC6DDCCB&:C/CDCD4;8C6BB@0@EB*CC2>$8?5>4 MD:Z:39T10C2T26G20 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:4 SM:i:37 MQ:i:60 OQ:Z:BABBBBBBBABBBBBBBBBBBBBBBBBBBABBBBBA355-@BBA:BABB?';@.>BBB3;7A4BA>/ABA)BA1=(6>B:(0B1;B?7-869?6??991:2 UQ:i:38 +20GAVAAXX100126:8:7:11540:56171 163 chr1 10003958 60 101M = 10004229 371 GTAATAAAAATTTAATTTTTTGGAATATGACTCTTGGAGTTTTTCTTTTTCTTTTTTTACTTTTGGACTCTGTTGGACTTGAGTTGTTTTTCTTTAATGGC @ABCBACCCCCCCAACCCCCCBCBCCACCBADBDBAAD=ABBA?CA@C? MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:ABBBBBBBBBBBBB?BBBBBCBB?ABABB@BBBBBBB>B?BBBBAAAAAA@BABBAA@AAAA@?AA?ABA?A>?@?B8>>@@<>>< UQ:i:0 +20GAVAAXX100126:8:7:11542:56193 1187 chr1 10003958 60 101M = 10004229 371 GTAATAAAAATTTAATTTTTTGGAATATGACTCTTGGAGTTTTTCTTTTTCTTTTTTTACTTTTGGACTCTGTTGGACTTGAGTTGTTTTTCTTTAATGGC @ABBBACCCCCCCACCCCCCCBCBBCACCBADBDBAA7AEABCCAAA=9DBA>C?ACB@>B@=ABB?CA?D# MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBABBBBAABBBBABBBBBCBB?@BABB@BBABBBB9B>BABBABBAA@?A@BBAAAA?B@?@@@>?B?AA>@A@<;A@A>=?<<@?>><>?# UQ:i:0 +20GAVAAXX100126:8:25:18450:91287 83 chr1 10003979 60 101M = 10003652 -427 GGAATATGACTCTTGGAGTTTTTCTTTTTCTTTTTTTACTTTTGGACTCTGTTGGACTTGAGTTGTTTTTCTTTAATGGCTTTATTGAGGTATATTTTATG >DABA><0*(?BC:BCA;B9(BDCBCCBC@6;96ADC?DD7+>65=8'=D@C################ MD:Z:86 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:B@6=AA:@CB00)?BA6>B<>B/)=BBAABBB>679:?BB@BB6+=9-;8(=B?A################ UQ:i:0 +20GAVAAXX100126:8:44:17629:79062 1107 chr1 10004001 60 101M = 10003678 -423 TCTTTTTCTTTTTTTACTTTTGGACTCTGTTGGACTTGAGTTGTTTTTCTTTAATGGCTTTATTGAGGTATATTTTATGTACCATAAAATGCACCTAAGTG =::6<=<<=BBB@<=>A@@B;@?ABA@:ABA@B:=:9<<=AC>ABAABABAAAABAA@@C@??@B>???=====<><=;?AA@<<>< MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:?><:>DGGB5444445554DCFGGGGFGF UQ:i:0 +20GAVAAXX100126:8:25:9891:26690 99 chr1 10004020 60 101M = 10004321 387 TTGGACTCTGTTGGACTTGAGTTGTTTTTCTTTAATGGCTTTATTGAGGTATATTTTATGTACCATAAAATGCACCTAAGTGTACAATTTGATAATTTCTA CDBADADBDBCDBCDADDBDCBDBBDDA@@C@@ADCBCBCDDCCDBCCCBCCCCDDDCCBCCACCCCDDDCBBCABDCDCABCCACDCDDCDCCDCDDCED MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHGGGGGGGHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:21:17746:177127 163 chr1 10004141 60 101M = 10004479 438 AACCATTCCTACAATCCAGTTTCAGAACAAGTCAATCACTCCAAAAAGTTCCCTAGAGACCATGTGCTGACAACTATTATTTTATAGGCTTTTCTTTTTCT @DAB@A@BCDCACBDCDDCACCCDCCEACCCBBDC@=B@@@CBDBD?CAC?CDBD:CCCABCBABC8DCC?BB>CBBBB>BAD@BBDB=CAAC>D@CCBCD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCCCCCCCCCCCCCCCCBBCCCCCCCBBACBBCBCCBBCCBCBBC:BABABCBC4BCBBBBABBBCBBBB>>@BBBA@>BB@@A@BBB=@BB=ABB?A@A UQ:i:0 +20GAVAAXX100126:8:21:17763:177132 1187 chr1 10004141 60 101M = 10004479 438 AACCATTCCTACAATCCAGTTTCAGAACAAGTCAATCACTCCAAAAAGTTCCCTAGAGACCATGTGCTGACAACTATTATTTTATAGGATTTTTTTTTTCT >C>A@AB?@D9:@BA?CC?*;CBBCAC8@B)=6B4@BC;=:<@=7CC@CCC?BB@AB=>;71BD MD:Z:88C4C7 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:=A@@?CB?=C;9A?CB)7A@CB?B4@@)@@>CAACBB:@?BC=872;=?>C?<(A7AA)BA@B<CEABCABCAA@BAACCACCA@CCBDABBCDCBE@D@?DBDCAACBCB@A>??ADDCDDDDDDDEDBCDEECCDBDDCBCCBABACD@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:B??A>A?@>>6<:;AB@@AA@BA?@@@@B?BBA@BBABBBB@BBBABBBCABBCBBBBBBBBBCBCCCBCCBBBCCCCCBCBCCCCBCCCBCBCCCBCBBB UQ:i:0 +20GAVAAXX100126:8:42:7969:44162 1187 chr1 10004170 60 101M = 10004491 421 AGTCAATCACTCCAAAAAGTTCCCTAGAGACCATGTGCTGACAACTATTATTTTATAGGCTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGA @CAA@ABBCADCCAEEEECBBCDDDCDCCCBBC?@>AACAA@CD:DCDCC;CCACDCCCCCC?BBD@CCC@DDABCABCBBBBABCDB>CAADABBDCBB> MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@CBCCCCBBCCCCCCCCCC@@CCCBBCBCBCAA>A@BBBBACBBCBBBBBCBBBBBBBBBBB=BBAAB@BA?BBBABB@BAA=BBB@ABB@BBA:@@A@<: UQ:i:0 +20GAVAAXX100126:8:42:7986:44148 163 chr1 10004170 60 101M = 10004491 421 AGTCAATCACTCCAAAAAGTTCCCTAGAGACCATGTGCTGACAACTATTATTTTATAGGCTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGA @CAA@ABBCADCCBEEEDCBCCDDECCCCCBCCCB>=AC>AACD?DCDCBACCACDBCCCCCCBBD@CDC@EDABB?BCBBADABCE=>CBAD@CBCC5B; MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCBCCCCCBCCCCBCCCBCACCCCCBBBBBCBBBB@CBBCABBBBBBCBABBBBBBABBBBBBBABBBBBBBBBBB?BB@BBBBBBB=BBA@A??AB?1<6 UQ:i:0 +20GAVAAXX100126:8:28:13507:25203 99 chr1 10004176 60 101M = 10004453 376 TCACTCCAAAAAGTTCCCTAGAGACCATGTGCTGACAACTATTATTTTATAGGCTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGACAGAGT CBC?DBCCDDDDCBDBBBDCCDCDABCCBBBBCBDACDACCCDCCDDABCCCCBDDDDBDAAAA@CA@@@@C@@@AA@@@C MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHGHHHHHHHHHHHHHHHHHHHHHHHA UQ:i:0 +20GAVAAXX100126:8:2:13260:81311 83 chr1 10004222 60 101M = 10003923 -399 TTATAGGCTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGACAGAGTCTCTTTCTGTCACCCAGGGTGGAGTGGAGTGGCACAATCTCAGCTC ?<@AC>C>@AA>>@@A;<;?=?@>>A??9DDADCBC?AAACADDCB=;>==B?A-CD=DADC??DBB=C>DBBAC=BDA@BBDACBBBCB@CDCD@D@>DC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBEGFBHCCCCCCCCC@@@A@CCCCCCC5HHGHHGHEGEGHHHHHH????@HDH.HHEHGHHEGHHHEHCHHHHHDHHHEHHHHHHHHHHFHHHHGHHFHH UQ:i:0 +20GAVAAXX100126:8:45:4354:79437 99 chr1 10004223 60 101M = 10004540 414 TATAGGCTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGACAGAGTCTCTTTCTGTCACCCAGGGTGGAGTGGAGTGGCACAATCTCAGCTCA CCBACCCDDDDBDAAAAACAAAAB=ACC?9C??C?=>@ABC6>>BB@C?:@C6A@?C? MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:B@@BCBBCCCBCCBCCCCCABCCCCCCBBBBBBBBBAA@A:A:@@:>;@@@A5>>@@A?89>=-29?>2<=;>< UQ:i:0 +20GAVAAXX100126:8:44:5789:104432 163 chr1 10004228 60 101M = 10004558 430 GCTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGACAGAGTCTCTTTCTGTCACCCAGGGTGGAGTGGAGTGGCACAATCTCAGCTCACTGCA @BCB??ACCBBBBBBBBBBECCCCCCCCECDBBCBA?B>ACCCA@CCB@DC@@B?ABAA@D@CDCAACA>=BB@AD@?C MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCBCCBCBCCCCCCCCCBCCCCCCCCCCCCCCCCBBBCBBBBBCA>AABCBBBBBBBBBBBBBA:BA>B@AB?@B>A=A@BBAB@@AABA<:@@BA@?:A UQ:i:0 +20GAVAAXX100126:8:1:17413:63912 1107 chr1 10004229 60 101M = 10003958 -371 CTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGACAGAGTCTCTTTCTGTCACCCAGGGTGGAGTGGAGTGGCACAATCTCAGCTCACTGCAA @@@@CAAAAACAAAABBAAABADDDDCCCDDDCCADDCBDBCBDBDADCDCDDDCCADBBCCBCBBACBBDACBBDACBBBCBDCDCDBDBCDCCCC@BDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:7:11540:56171 83 chr1 10004229 60 101M = 10003958 -371 CTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGACAGAGTCTCTTTCTGTCACCCAGGGTGGAGTGGAGTGGCACAATCTCAGCTCACTGCAA @@@@CAAAAACAAAABBAAABADDDDCCCDDDCCADDCBDBCBDBDADCDCDDDCCADBBCCBCBBACBBDACBBDACBBBCBDCDCDBDBCDCCCC@BDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:7:11542:56193 1107 chr1 10004229 60 101M = 10003958 -371 CTTTTCTTTTTCTTTTTCTTTTTTTTCTATTTCTGTTTGAGACAGAGTCTCTTTCTGTCACCCAGGGTGGAGTGGAGTGGCACAATCTCAGCTCACTGCAA @@@@CAAAAACAAAABBAAABADDADCCCDDDCCADDCBDBCBDBDADCDCDDDCCADBBCCBCBBACBBDACBBDACBBBCBDCDCDBDBCDCCCC@BDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GGGGGGGGGGGGGGGGGGGGGGHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:4:16802:116090 659 chr1 10004234 60 101M = 10003895 -439 CTTTTTCTTTATCTTTTTTTTCTATTACTGTTTGAGACAGAGTCTCTTTCTGTCACCCAGGGTGGAGTGGAGTGGCTCAATCTCAGCTCACTGCAACCTCT CEAB?%B*A<&A>)D?;@9<9<@;43,@B(=9C>8AA3<85?;*A47?CC>3)@*6,44BA:@?71B@@;>4B=A9; MD:Z:10T15T49A24 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:3 SM:i:37 MQ:i:60 OQ:Z:BB>;8%=)<;'3:7C>;42)@?)=5A>9?==B+A84ACCA=,A<008ABABBC?8?;;@@*?13@AB*3+01A>9=A8+C>?9;-A;@37 UQ:i:24 +20GAVAAXX100126:8:25:13133:61877 147 chr1 10004291 60 101M = 10003984 -407 CAGGGTGGAGTGGAGTGGCACAATCTCAGCTCACTGCACCCTCTGTCTCCTGGGTTCAAGGGATTCTCCTGCCTCAGCTTCCCAAGTAGCTGGGATTACAG BBA>AAC9A:2>?@@=>=A7<><9>9@;?B7=@>=?B>,=ABAC=4@>B?;CB@C=8A?AB<;?0:ACBCBCCCBDBCDCCCCCEACCBCCCBBBBA@BD@ MD:Z:38A62 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:A:?8A??6<:/9@=?7<<<5<=96>6?:<>6:8=?>A>/=A?@@=/>:<@?@B?BBB=BB?B@>CCC>BC=A@@DCCBC?CBD@BBB5BBCDACDCCCCCBCEDABCCAB;BDCACBBBA@:B@ MD:Z:87 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:###############?67<7<97>::????AAB@35??@==>>>>B=B@>A==9?5<=> MD:Z:85 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:#################A??>552449>;9A3455544515<8<><55440455554445555554<=@9=A>CCC89CCC54455AGDGFAFAD@DDDDD UQ:i:0 +20GAVAAXX100126:8:8:8310:17522 147 chr1 10004327 60 17S84M = 10003989 -421 ACAATCTCAGCTCACTGCAACCTCTGTCTCCTGGGTTCAAGGGATTCTCCTGCCTCAGCTTCCCAAGTAGCTGGGATTACAGGCACTTGCCACCACGCGCA ##################B<@AA@>147?874@C8BB>CAAB?@CCA9ABACBCCB=CB?C@@A?C@CDBBCCCB?DBBBCCBBBCDBBCBBCB@9A:BB@ MD:Z:84 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:##################?6?@=A@>:>ABBABAB?BB?BAABABBBBAABBBB>BAAAABBBABBBBBBABBBBBBBBB UQ:i:0 +20GAVAAXX100126:8:21:4413:182078 99 chr1 10004409 60 101M = 10004700 382 CAGCTAATTTTTGCATTTTGAGTAGACAGGGGGTTTCACCATGTTGGCAAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCTGCCCACCTTGGCCTGCTG CCB@DCDCDDDDBBCCDDDBDCBCCDACCCCCCADDBCABCCBBDBCBCDCCBDBCBBDB;DD@DBBDBCABDBCCCA@DCBDCBCCCACDDCD?CDCCED MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:23 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHIHHHHHHIHHHHHIHHHHHHHGGHHHHHHHHHHHHHHHEHHHHHH UQ:i:0 +20GAVAAXX100126:8:1:11745:83846 163 chr1 10004442 37 101M = 10004719 377 TTTCACCATGTTGGCAAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCTGCCCACCTTGGCCTGCTGGAGGTATGTGACAAACTGATGGCAAATGGATTA @BAA@?BBCBACBBCCDCCCDBC@BBB;CDADBCC;7@ACBCBC;BD?BD=BC@?BBCCAABBCAC@B8A?C?;:>A;C>BAC?@BA;?C=;C>=A;?AA; MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:37 MQ:i:29 OQ:Z:BCCCCCCCBCBBBBCBBCBCBBB:B@BBABBBABB>@A6=9=?:??B??B=?;:?@=CB;7?97@:8 UQ:i:0 +20GAVAAXX100126:8:45:9158:12274 633 chr1 10004442 0 62S39M = 10004442 0 CTGGGATTACAGGTGCCCACCACCACACTTGGCTAAGTTTTGTATTTTTAGTAGAGTCGGGGTTTCACCATGTTGGCAAGGCTGGTCTCGAACTCCTGACC ###############################################################>>=;+<<=9;<:9=;?;;?=<<=?<6==?@>9@>5>*1 MD:Z:39 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:0 OQ:Z:###############################################################AA?51544555455545545544455544445455515 UQ:i:0 +20GAVAAXX100126:8:45:9158:12274 693 chr1 10004442 0 * = 10004442 0 CAAGAATTTCAGTTGCAGTCAGTACTCGGTTCAGGAACACTCCACCCAGGTTTGTAGCTTAGGAGCAATAGACTTTTTCCTATGCAATGTAGGTGTGTGGT ##################################################################?1AC5516*B*C'BA*A&@-<-0;4;6B.@)41@< PG:Z:BWA RG:Z:20GAV.8 OQ:Z:##################################################################@/@A30-2)@*A*A@*?'?-8,29074A,@*02A8 +20GAVAAXX100126:8:28:13507:25203 147 chr1 10004453 60 1S100M = 10004176 -376 TTGGCAAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCTGCCCACCTTGGCCTGCTGGAGGTATGTGACAAACTGATGGCAAATGGATTATGCCCCCAAT ##A:AAC?:?@C:CBB1@B>A>@A@A=ABC@CC@@BBBBB@AAC>BC@CC?CBCCC=CBA5>ABA@BBBCCCBCCBCBCBBCCCCCCBCCCCBCBBAABC@ MD:Z:100 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:##@4=?<=6=?A=@==4B=;@==A?B8A@?AABAAA@AA@A@@A8@AA@BAAAABB@AB@7ABBBBBBABBBABBBB@BBBBCBBBCBBBBBBBBBBBBCB UQ:i:0 +20GAVAAXX100126:8:21:17746:177127 83 chr1 10004479 60 101M = 10004141 -438 CCTCAGGTGATCTGCCCACCTTGGCCTGCTGGAGGTATGTGACAAACTGATGGCAAATGGATTATGCCCCCAATGGACAGTGTTCTCAAACCGTGATCTGA DDDCAC?DCCDCCBCCBCCAD;BBCCCBCCABDBACCCACBCBDDBCCBCCBBBDDCCBBCDCCCBCCCABDCCBBBBCACADDCDBDDBC;ACBCDACBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHDHGHHHHIHHHHHHHGHBHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:21:17763:177132 1107 chr1 10004479 60 101M = 10004141 -438 CCTCAGGTGATCTGCCCACCTTGGCCTGCTGGAGGTATGTGACAAACTGATGGCAAATGGATTATGCCCCCAATGGACAGTGTTCTCAAACCGTGATCTGA BACCDC=D@CD??>?@;45::@A>B?B>>DDA=A:@@B@BADCCBBCDCCC@CCCCBDCCBB@BCCCB>BB=?AABBBABABBA@?BABBB@BBA@A@A@AA@CBA>ABBABA@CA?ABC<>>8@@@AAC@BBBB@AABCA@@?BB@>BBBB=AB>A@:AB?B?AB@C@A@AB9@BABCAC@CA@?BB:@@:==< MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:5GGDG;GCEGGGEGGGGGGGGGGGGGCAC9CGGEGGGGGGGGGGGGGGCDGGECGGGGEGGAGGCGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGG UQ:i:0 +20GAVAAXX100126:8:42:7986:44148 83 chr1 10004491 60 101M = 10004170 -421 TGCCCACCTTGGCCTGCTGGAGGTATGTGACAAACTGATGGCAAATGGATTATGCCCCCAATGGACAGTGTTCTCAAACCGTGATCTGAGGACCCGGGGAT 89A4@9@BCB@C@@>B>BBAABB=?B7>CBABB@BCBBCD>:C=CBDCB@;BDB>=BBCCCCDCCACCCCCACBDBCCBCBDBCBCCCBBABC@ MD:Z:98 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:####?>778?6=9>??A???>??@@@@@@=>:@>BAAA@A@BA@B?B@@B@ABBAAAABBB@@BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCBB UQ:i:0 +20GAVAAXX100126:8:2:15114:83430 147 chr1 10004542 60 101M = 10004220 -422 ATGCCCCCAATGGACAGTGTTCTCAAACCGTGATCTGAGGACCCGGGGATCCCGAGACCTTTTAAGGGTATCTGTGAGATCAAAGCTATTTTCATAATAAT >D=@B<=ACAAC?BCD@BABBBDAB@=A;BABDC?CBCC>;CC;CCC?DC@C;CDB>BBCCBABBBBACCCCCACBDCCCCCCDBCCBCDDCBBB?@ABC@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:;?;8AABABBBABBBB>:BABBBAABBAABBBB@@BBBBBBBBBBBBBBBBBBBCBBCBBBBBBBBBCBBCBCCCBCB UQ:i:0 +20GAVAAXX100126:8:4:5529:91628 83 chr1 10004550 60 101M = 10004227 -423 AATGGACAGTGTTCTCAAACCGTGATCTGAGGACCCGGGGATCCCGAGACCTTTTAAGGGTATCTGTGAGATCAAAGCTATTTTCATAATAATCCTGAGAT EDDCCCCDAC@DDCDBDDBC;ACBCDCCBDBBBCC;CCBBCDC>;BDBBCCDDDCDCBBACCDCCACBCBCDBDDDBACCDDDDBCCDCCDCDCCCBBBCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHIHHHHHHHGHHHHHHHHHHHHHIHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:44:5789:104432 83 chr1 10004558 60 101M = 10004228 -430 GTGTTCTCAAACCGTGATCTGAGGACCCGGGGATCCCGAGACCTTTTAAGGGTATCTGTGAGATCAAAGCTATTTTCATAATAATCCTGAGATGATATTTG ?EBEDCCCDDCC;ACBCDCCBBBB@CC;BBBBCDCC8BDBBCCDDDCDDBBACCDCCACBDB@DBDDCBCCCDDDDBCCDCCDCDCCCBDBCCBCCCBDCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GHHHHHFHHHHHHHHHHHHHHGHHEHHHHHHHHHHHFHHHIHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:25:3804:30880 163 chr1 10004579 60 101M = 10004891 412 AGGACCCGGGGATCCCGAGACCTTTTAAGGGTATCTGTGAGATCAAAGCTATTTTCATAATAATCCTGAGATGATATTTGCTCTTCATTCTCTCTCTCTCA @CCB?AB:CCCCCACC;CCCACDCCCCDCCCACCACA>AB@CCCADDCBDACCADCCDBDBCCBACABDCADBABAB@?AABCCCCDB>:AAD?D?DBBD@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:ABBBBBBBBBBBBBBBBBBBBBBBBBBBBBB;BBBBB@BBCBBBBBBBBBBBBBBCBBCBBBABCABBBB@BCBBAB?;AB@BA@BBB>4??BB?B@@@A< UQ:i:0 +20GAVAAXX100126:8:42:4321:53069 99 chr1 10004580 60 101M = 10004795 315 GGACCCGGGGATCCCGAGACCTTTTAAGGGTATCTGTGAGATCAAAGCTATTTTCATAATAATCCTGAGATGATATTTGCTCTTCATTCTCTCTCTCTCAT CCC?CC;ACCDCBBB;DCDABDDDDCDCCCBCCBCBB@DCDCBCDDCBCCCDDDBCCCDCCDCBCDBACDCBDC=CDDBBDBDDBCCD@DADCDCDCDCED MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHDHHHHHHHHHHHHHGHGHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:41:10214:75276 163 chr1 10004590 60 101M = 10004899 409 ATCCCGAGACCTTTTAAGGGTATCTGTGAGATCAAAGCTATTTTCATAATAATCCTGAGATGATATTTGCTCTTCATTCTCTCTCTCTCATGAGCATATGG @CBBA8BBCACDB@CCDCDCBCCCDBBCCCCCBCCCAACBCCCC@CDCCDABD@CEBDCCBBCBCD@CCBBCEAABAAAC@BBBADCC?C@AC@BAC@ACD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCCBCBBBBBCCCBBBCCB@BBCBBABABBBBBBBBBBBBBBBBBBBBBBABBBBBABBBBBBBBAABAABBBBBAB@B@B@B@ABB@@BA@B@?@<@@B UQ:i:0 +20GAVAAXX100126:8:5:7628:108971 99 chr1 10004623 60 101M = 10004891 362 AAAGCTATTTTCATAATAATCCTGAGATGATATTTGCTCTTCATTCTCTCTCTCTCATGAGCATATGGGAGTTTTCCAGAGACTTCCTGCCAGGCCACATC CDDACDCCDDDBCCCDCCDCBBCBDCDCBDCCCDDBBCBCDBCCDBCBCBCBDBDBCCBDCBCCCCBCCACBDDDBBCCDCDADDBCDCACCCDCCDBDDD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:5:7649:108990 1123 chr1 10004623 60 101M = 10004910 362 GAAGCTATTTTCATAATAATCCTGAGATGATATTTGCTCTTCATTCTCTCTCTCTCATGAGCATATGGGAGTTTTCCAGAGACTTCCTGCCAGGCCACATC *DDACBCADBD?ACBCC?ACBABBDCDCBDCCADBBBCBCDBCCDBCBC?CBDBDBCCBDCBCC>CBCBACBDDDABC@DCDADDBCDCA@CADCCD?DDD MD:Z:0A100 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:8HHHHGHIHIHIIHGIHFGHHGIHHHHHHHHHIHIHHHHHHHHHHHHHHFHHHHHHHHHHHHHHCHHHGDHHHHHGHHGHHHHHHHHHHGFHFHHHHGHHH UQ:i:9 +20GAVAAXX100126:8:42:11066:118124 163 chr1 10004633 60 101M = 10004938 405 TCATAATAATCCTGAGATGATATTTGCTCTTCATTCTCTCTCTCTCATGAGCATATGGGAGTTTTCCAGAGACTTCCTGCCAGGCCACATCACAACTGACC @CBB@ABBDCBCDACCCCCCCCDCCBBDBDCBCCBABACACBDCBBCDBDABCABDBDCCBACACB@CCCACABC@BBB@ABBB?CC@@C?B?>D?DCBBA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCCCCCCCBCCCCBCBBCBBBCCCBBBBBBBBBBBBBBBBBBBBBBBBBBBBBABBBAAB?A@BAABBAB@AAA@BA@?@A?A>A@A@@@@><@??@@@> UQ:i:0 +20GAVAAXX100126:8:66:6823:31942 83 chr1 10004641 37 20S81M = 10004294 -427 TCAAAGCTATTTTTATAATAATCCTGAGATGATATTTGCTCTTCATTCTCTCTCTCTCATGAGCATATGGGAGTTTTCCAGAGACTTCCTGCCAGGCCACA #####################<>6=><99:26:>@=<:=<=;,<<<+5<;9=<=<===BB?B??AAB8AA>@A@=CBAAB9CACBAB@AC@CDABA?C@CB?@AA@CCBCBBCACADC=CADDCDBCADBABCCBA?CBD@ MD:Z:92 PG:Z:BWA RG:Z:20GAV.8 AM:i:23 NM:i:0 SM:i:23 MQ:i:60 OQ:Z:##########?364??8@9-<>;>@AA8;;?=@>@B>A?A6A@?A:@A@ABBBAABAA@BBAABBABBB@BABBB@BB:B@BCBBBBCBBBCCBBBCCBBB UQ:i:0 +20GAVAAXX100126:8:1:11745:83846 83 chr1 10004719 29 101M = 10004442 -377 ACATCACAACTGACCTGACCCAGAAGTACATCTGAGAATTCAGCTGTCTTCTATTAAGCCAGACATTAAAGAGATTTGCAAAAATGTCAAACAAGGCCACT DDCBCCCDCCCBCCCCBCCCBDBDDACBBCDCCBDBDCDDBDBCCADCDDCCCDCDCBCBDBBBCDCDDCBDBCDDCBBDDDDCCADBDDCBDDCCC@BCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:0 MQ:i:37 OQ:Z:HHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:26:5797:83894 99 chr1 10004750 60 101M = 10005102 425 CTGAGAATTCAGCTGTCTTCTATTAAGCCAGACATTAAAGAGATTTGCAAAAATGTCAAACAAGGCCACTCTGCATGTAATAGGCTTATCTTAAAAATGAA CDBBCDDCDBCCBDBBBDDBDCCDCDCBBCCCABCDCDDCDCDCDDBBCADDDCBBBCDDACDCCBBCADBDBBCCBCCBCC>CBADC@BDDCDB?>BAC6 MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHIHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHEHHEHHEHHHHHFBBFFF5 UQ:i:0 +20GAVAAXX100126:8:43:4132:159191 675 chr1 10004784 29 95M6S = 10005128 377 TTAAATAGATTTGCAAAAATGTCAAACAAGGCCACTCTGCATGTAATAGGCTTATCTTAAAAATGAATTAATATGTCTCTTGGTTGCCGTTTCTAATCAAA <*89B,AA=@4@)(@@CCB=6/C>CB5>B6)7C<>7A;@?.7DA>CCB8BBABA####### MD:Z:5G89 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:1 SM:i:29 MQ:i:29 OQ:Z:8):5@,@;B:AA7@7>A=B?27;ABA7A<+A=>:BAC@78==:7B9:@==8?))<>ACC:6-A?B?6=>4(1B==/A9==.4B?CCBBCCBCCCCBCCCBCCCBCBCCCB UQ:i:0 +20GAVAAXX100126:8:27:3628:73265 611 chr1 10004826 60 35M66S = 10005114 346 GTAATAGGCTTATCTTAAAAATGAATTAATATGTCTCTTGGTTGCCGTTTCTAATACAATCCATCTGCCTCAGACTCCCAAAGTCCTAAGATTATAGGTAT 1405==>9;?<+?>@==<<>>=<=############################################################################# MD:Z:35 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:51155455555,44444554??>?############################################################################# UQ:i:0 +20GAVAAXX100126:8:21:16295:166486 163 chr1 10004833 60 101M = 10005120 387 GCTTATCTTAAAAATGAATTAATATGTCTCTTGGTTGCCGTTTCTAATACAATCCATCTGCCTCGGCCTCCCAAAGTCCTGAGATTATAGGTATGAGCCGC @BCBAAACACDDDBCCCDDCCDDCCBBBDBDCBC@BA?A:@BCCBCDDCAACD@CDBBCB@AC@:C=ACA@DCABA?@BA@;B@ACAB@@?@8@>?@?@>@?;?@>?>??>? UQ:i:0 +20GAVAAXX100126:8:63:6148:94882 675 chr1 10004865 37 73M28S = 10005252 421 GGTTGCCGTTTCTAATACAATCCATCTGCCTCGGCCTCCCAAAGTCCTGAGATTATAGGTATGAGCCGCTGCACCCGACCTAGCCTTTTCATATGGAGTCT @?5??AB6>A;A71DC;@CABB9?C;CA60790,%=7/=?>BC=?8;;=665D=77:;483A:24A.0;73B############################# MD:Z:73 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:37 MQ:i:29 OQ:Z:B=4=?BB>7?;@62BB:AB?AB5;B9A@3.2650(?;0@C?AB9A975>175B?32945/5@80/?/5811@############################# UQ:i:0 +20GAVAAXX100126:8:42:16609:191180 163 chr1 10004875 60 101M = 10005207 432 TCTAATACAATCCATCTGCCTCGGCCTCCCAAAGTCCTGAGATTATAGGTATGAGCCGCTGCACCCGGCCTAGCCTTTTCATATGGAGTCTCAGACAGTGA @CCAAAA@BDCCCACCDCCDDC;CCDEBCDDDDC@A>CABBBCCACCCCBACCACBC;BCABB?CB5CBCBBC@ACBBCAAABBACAB:BBA:AD?BD@DD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCBCCCCCCCCCCCBCBCCCBCBBCCCBBCCBBBABCBBBBABBBBBBB?BBBBBABBBABAAAB@@BABAAAB@AAA@B@B@A@@CCBDBC;BCCBB@CC;BBCCCCBCCDDDDBCCCCBBDADCDBDBBBDACBDDCDDBDADCDCCACCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:#HHHHHHHHHAGHHHHHHFHHHHHHIHHHHHHHDHHHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:5:7628:108971 147 chr1 10004891 60 6S95M = 10004623 -362 TCCATCTGCCTCGGCCTCCCAAAGTCCTGAGATTATAGGTATGAGCCGCTGCACCCGGCCTAGCCTTTTCATATGGAGTCTCAGACAGTGAAATTCAGTCA #######@B>25;8@8/:A;>B<>9;;>C@B;?@?????@;BBABAB@BABBBBBBBBBBBBBCBBBBBBBCBCBBBBBCBBBCCCBCB UQ:i:0 +20GAVAAXX100126:8:41:10214:75276 83 chr1 10004899 60 101M = 10004590 -409 CCTCCCAAAGTCCTGAGATTATAGGTATGAGCCGCTGCACCCGGCCTAGCCTTTTCATATGGAGTCTCAGACAGTGAAATTCAGTCAATATATTTATAATG DDCDDCDDDADCCCBDBADCCCDCACCCBDBC;BCCBBBCC;BBCCCDBCCDDDDBCCCCBBDADCDBCBBBDACBDDCDDBDADBDCCCCCDDCCCBBCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHFHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:5:7649:108990 1171 chr1 10004910 60 25S76M = 10004623 -362 TCCATCTGCCTCGGCCTCCCAAAGTCCTGAGATTATAGGTATGAGCCGCTGCACCCGGCCTAGCCTTTTCATATGGAGTCTCAGACAGTGAAATTCAGTCA ##########################B7:B8;C<88@A740B@C=D:?C?ACDCCCCDCDCDACCCBDCBBD@CCCBCCBAB@CB@ MD:Z:76 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:##########################A56<99@<<6ACBBBBBBCBCCBBBCBBBCABBBBCBCBCBBBCBBB UQ:i:0 +20GAVAAXX100126:8:42:11066:118124 83 chr1 10004938 60 101M = 10004633 -405 CCCGGCCTAGCCTTTTCATATGGAGTCTCAGACAGTGAAATTCAGTCAATATATTTATAATGAATACTTTTTTTTTTGAGATGGAGTCTTGCTCTTGTCAC DE;AADCCDCCCDDDCBCCCCCBDADCDADBCBDAC>ADBBCBCACADCBCACCC?CADBC?D@C;=>AAADDDDDCBCBCCBBDADCDCBCDCDCABBCC MD:Z:70A30 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:IHHGGHHHHHHHHHHGHHHHHHHHHHHHIHHHHHHHCGHIGGHGHGGHHIGIHIIFHGHGHGHFHCEEGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:32 +20GAVAAXX100126:8:25:19700:154119 147 chr1 10004969 60 101M = 10004642 -427 ACAGTGAAATTCAGTCAATATATTTATAATGAATACTTTTTTTTTTGAGATGGAGTCTTGCTCTTGTCACCCAGGCTGGAGTGCAATGGCGTGATCTTGGC BD?@C@D@CCAC??DCCAC@<@DBABAADD@CDC>AA>BCACCCEDBCCDCDBDCCBDCBB;ACBC@@ABCC@ MD:Z:39A61 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:?B;BBCBBBBCBBCCBCBBBBCBBCCBCCCCABCBC>BBBCCBCBCCBCCCCCCCCCCCCCBCCCCCCB UQ:i:35 +20GAVAAXX100126:8:48:10519:110723 99 chr1 10004994 60 101M = 10005311 417 ATAATGAATACTTTTTTTTTTGAGATGGAGTCTTGCTCTTGTCACCCAGGCTGGAGTGCAATGGCGTGATCTTGGCTCACTGCAACCTCCGCCTCCCAGGC CCCBCCCDCCADDDDDD;>:?>:C@<<7:==;=;=???A@>BBBABBCCCB@BC8CCB?@BC>B:5<:7;9<9;5BCCDDE@ MD:Z:14A86 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHGHHHHHHHHHHBCBCC=HD4555445444EDFFFEHHGHGHHHHHEHH:HHHCDGHCGB=<@;>54445H=HHHEEHEHHHHHHHHHHDHHHHHE UQ:i:35 +20GAVAAXX100126:8:68:18251:192056 147 chr1 10005081 60 27S74M = 10004792 -362 ATGTCGTGATCTTGGCTCACTGCAACCTCCGCCTCCCAGGCTCAAGCGATTCTCCTGTCTCAGCCTCCTGAGTAGCTGGAATTACAGGCATGGGCCACCAC ############################B99A@9?9BBA;=@@BB@@CACBDCBACC>@AABBABA@BBC@ MD:Z:74 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:############################?;;?<6???:@??<6?????;A?A?>A?B@@@A;@AA?A?:A@@=?B>>BABABB@@ABAA?AABABB>AB@B UQ:i:0 +20GAVAAXX100126:8:1:6636:49601 99 chr1 10005102 29 101M = 10005477 440 ATTCTCCTGTCTCAGCCTCCTGAGTAGCTGGAATTACAGGCATGGGCCACCACGCCCGGCTAATTTTTGTGTATTTAGTAGAGATGGGGGTTTCACTATGT CCC@DBCDBCBDBCCBBDBBDBDCBCCBDBCDDCDCACCCBCCBCCBBCABCA;BBB;CBDCDCDDDDB@ABCCDDCCCCCDCDCBCCCC?DDCDADD@DD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEGHHHHHHHHHHHHHHHHHHHAHHHHHHHDHH UQ:i:0 +20GAVAAXX100126:8:26:5797:83894 147 chr1 10005102 60 27S74M = 10004750 -425 GCAACATCCGCCTCCCAGGCTCAAGCCATTCTCCTGTCTCAGCCTCCTGAGTAGCTGGAATTACAGGCATGGGCCACCACGCCCGGCTAATTTTTGTGTAT ############################B865235A?;>>C;9A:>@)?C7<=@>C899CC28)5B??:CB@CA<8/B>8B;@8@>B:CC?CCCB?@@BC@ MD:Z:74 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:############################?161316B;;<=B99=6<;*;A<9<>:B;6:BB58)8BA>9BA>B@942B9>B;><>>A?BB?BBCBB?BBBB UQ:i:0 +20GAVAAXX100126:8:27:3628:73265 659 chr1 10005114 60 42S59M = 10004826 -346 CATCCAAGCACCCACTCCAAGGCTCAAGCAATTCTCCTGTCTCAGGCTCCTGAGTAGCTGCAATTACAGGCATGGGCCACCACGCCCGGCTAATTTTTGTG ###########################################C@3=3<=>C<&><@C@*'8B9;:'>@8?:BCBBC<,0CA;C9&0/1>9C.CB?@?9A? MD:Z:3C14G40 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:###########################################?>172:>@B:(;;BA@,';B;@9';>6>9?BABB9)3CBCC9)204;;B,BC??><=> UQ:i:24 +20GAVAAXX100126:8:21:16295:166486 83 chr1 10005120 60 101M = 10004833 -387 CCTGAGTAGCTGGAATTACAGGCATGGGCCACCACGCCCGGCTAATTTTTGTGTATTTAGTAGAGATGGGGGTTTCACTATGTTGGCCAGGCTGGTCTCGA DDDCDACDCCCCBDCDCCCDBBBCCBBBCBCCBC;BCC;BBCCDBBDDDCACACCDDCCACCBCBCCBBBBADDDBBCCCCADCBBCBDBBCCCADCB:BC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:43:4132:159191 595 chr1 10005128 29 66S34M1S = 10004784 -377 GTCTGGGCTCACTGCAACCTCCGCCTCCCAGGATCAAGCGATTCTCCTGTCTCAGCCTCCTGAGTAGCGGGAATTAAAGGCATGGGCCCCCACGCCCGGCG ###################################################################################################4. MD:Z:2T7C11A11 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:3 SM:i:29 MQ:i:29 OQ:Z:###################################################################################################A7 UQ:i:6 +20GAVAAXX100126:8:48:2299:12648 99 chr1 10005135 60 101M = 10005519 438 TTACAGGCATGGGCCACCACGCCCGGCTAATTTTTGTGTATTTAGTAGAGATGGGGGTTTCACTATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCATG 9DC?CCCCCCBCABBCABCA;BBB;CBDCDCDDDDBA@?CCD@CCBCC@:DC@CCCC9@DBCADCCBC@:==;;=BABA;:>?:69A9A=?=AC:A=?BAD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:IHHHHHHHHHHHIHHGHHHHHHHHHHHHHHHHHHHHGGEHHHFHHHHHDBHHFHHHHBFHHHHHHHHHD@BABBAGGFFB;DA@D:D@DAA?GGBF>DDGH UQ:i:0 +20GAVAAXX100126:8:42:16609:191180 83 chr1 10005207 60 101M = 10004875 -432 CAGGCTGGTCTCGAACTCCTGACCTCATGATCCACCTGCCTCAGCCTCCCAAAGTGCTGGAATTACAGGCGTGAGCCACTGTGCCTGGCCTATAATGAATA DBC?CDCADCD;BDBCACCB@9CCDB?B=7=>-A9;:<<;7:===:7=<<<:<:7<<,99=>>?>=>?5>;1 MD:Z:61 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:#########################################A=A9A:AA/F=@===444444444544454555/5444/55=9:=CF5AA4454444555 UQ:i:0 +20GAVAAXX100126:8:48:10519:110723 147 chr1 10005311 60 101M = 10004994 -417 TTTAATATTCTCTTTGTTTCTGTCCACAGCAAGAATCTTTATTTCCTGAGACCTTTTGTAGCAGATTATTTTCCCTCAGTTTCTTGCCCTTAAGAGTCTTC DE?BC@ABCA?A=CDAC?A@B@CB:9BACC@CCB>BABCCBDDDCBD:CB:DBDBDABBABACA>A?CDDDCCCDCBDAAECCCDCC8BDCBCBC??BCB@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BAB<@B=AA@>==@BA@=;?BA@A:3B@BA@AB@@?@BBBBBBBB@CC@B-A2A(;A3;9B*ACB8A8;B+A&;>B>81:=/852?97481CB=2=4+*)A)CBC/++@)%>.?''8C3C@######################### MD:Z:16A25T22T11 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:3 SM:i:37 MQ:i:29 OQ:Z:*((?<-?/?(:@4=6@*>C@4@37@*>(8;@;7/9@1<72@;81<1@?>/?1*)(<)@B@/*+@))@.:'(1@2A?######################### UQ:i:35 +20GAVAAXX100126:8:43:10736:78356 611 chr1 10005394 60 35M66S = 10005705 411 TTGCCCTTAAGAGTCTTCGGTTCTCCACAGATAAGAGATTTGCTTTTGATATTTCCTCGACCTGCTCATAAGGATTTCATTATAACTTCTGGGTGCAATCT ?################################################################################################ MD:Z:35 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:AA7>A################################################################################################ UQ:i:0 +20GAVAAXX100126:8:61:2951:38254 163 chr1 10005434 60 96M5S = 10005758 424 TGCTTTTGATATTTCCTCGACCTGCTCATAAGGATTTCATTATAACTTCTGGGTGCAATCTTTAGGAGAGTTTGCAAAACAGTCTTTTTTTTTTTTGTAAT @BBC??@ACCBCB@CDEC;CBDDCCDCCDCDCCCBBAABBBACCBBDCBD@CD>CCCDCB=CC;CD@ACC@DD?ABBBC@@ACACCDCADBCD@D###### MD:Z:96 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCCCCCCBBBCCCCCCCCBCCBCCBCBCBBCBBBBBBBBBCBBBCBBBBBBB?BBBBBBCBB=BBB@AB@BB@BBA@@B?B?AB@AAAAAAABA###### UQ:i:0 +20GAVAAXX100126:8:21:9990:22806 163 chr1 10005457 60 72M29S = 10005782 425 GCTCATAAGGATTTCATTATAACTTCTGGGTGCAATCTTTAGGAGAGTTTGCAAAACAGTCTTTTTTTTTTTGTAATGGAGTTTCGCTCCGTCACCCAGGC @CCAABBCBCCCB@CCDBCCCDADCBECCCBCBCAB@C>BBCCDACCBCC@BC@CC?DCBADCBCC@CDCA############################## MD:Z:72 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCBCBBBBBCBBCCCBCCBBBBBBCBCBBBABBBABBBCBBBBBBBB?BABBBA@ACBBBBBBBBBBBBBB############################## UQ:i:0 +20GAVAAXX100126:8:6:21370:119926 99 chr1 10005465 60 97M4S = 10005757 389 GGATTTCATTATAACTTCTGGGTGCAATCTTTAGGAGAGTTTGCAAAACAGTCTTTTTTTTTTTTGTAATGGAGTTTCGCTCAGTCACCCAGGCTGAAGTT >743?@CCBDADD?DBC=?BBCDC?DDDCCCCA>=?DDB>>????>;;2>:=4A>@?D56>B2:>98@/DBD?##### UQ:i:25 +20GAVAAXX100126:8:1:6636:49601 147 chr1 10005477 29 35S66M = 10005102 -440 ATGATTCCTCGGGCTGCGCATAAGGATCTCCTTTGAACTTCTGGGTGGAATCTTTAAGAGAGTCCTCAAAACAGTCTTTTTTTTTTTTGTAATGGAGTTTC ###############################BC*89BA>=(29CACC?:+8'%BC54@21,-6%5'85=CB?<7%0>EEEEEEEEEEAACBCCBAA?CCB@ MD:Z:12C8G6T0T0G35 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:5 SM:i:29 MQ:i:29 OQ:Z:###############################?@);6@A<;(.8BB@AA5+:'%?B54>/1+08,:(:4=BDCBCCCBDDA;BABC6BBCABBCACB=@BBA;A?;?CCCCBCCDCCDD@D@. MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHGHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHGGGGGGGGEAGHGHHGHGHHIHHEHHEGHHHHHHGHH=GFFFCFDBDHHHHIHHHHHHHGHG2 UQ:i:0 +20GAVAAXX100126:8:48:2299:12648 147 chr1 10005519 60 46S55M = 10005135 -438 TTTAAACTTTGGGGTTAAACTTTTAGGAGAGTTTGCAAAAAATTCTTTTTTTTTTTTGTAATGGAGTTTCGCTCCGTCACCCAGGCTGAAGTGCAGTGGTG ###############################################ACCBDCDDDA=3>C9A?<@CDC;ABCC;=BBBBC?EDAABABD=CB?B?A@=C? MD:Z:55 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:###############################################B@BBBBBBBB<5?B;BA@BBBBBAAABB=@A@AB=CCA@ACCC?CC@CAB??B? UQ:i:0 +20GAVAAXX100126:8:48:13575:23805 163 chr1 10005529 60 83M18S = 10005856 427 TGTAATGGAGTTTCGCTCCGTCACCCAGGCTGAAGTGCAGTGGTGCAATCTTGGCTCACTGCAACCTCCGCCTCCAGGGTTCAAGGGATTCTTGTGCCCCA @BABBBACCBACC>;BDBC;BABACABACBDCCDB;A<ACA@BCBBA>?B@BC@C?BA@A=?B>@B9>CC?>A@9<####################################################################################### PG:Z:BWA RG:Z:20GAV.8 OQ:Z:*@+ABCBAA=47;?####################################################################################### +20GAVAAXX100126:8:46:11003:55063 99 chr1 10005652 60 101M = 10005971 396 CAGGCGCCCGCCACCACACCTGGCTAATTGTTGTATTTTTAGTAGAGATGGGGTTTTACCATGTTGCCCAGGCTGATCTCAACTCCTTTTTTTTTGAGACA CCBAC;CCB;BBCABCACABDBCBDCDCDBB@@@ACDDD@BCBCADCCCBCBC@ADDCABCCBCDBBBBCCBADBBCBCBCDAD@CDDDDDD?@@?D@DBE MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGFGHHHHGGHHHGHHGHHHGHFGHHHHHHHHHHHHHHHHGGHHFHHIHHHHHFHHHHHHHEGGCHDHHH UQ:i:0 +20GAVAAXX100126:8:43:10736:78356 659 chr1 10005705 60 101M = 10005394 -411 GTTTACCATGTTGCCCAGGGTGATCTCAACTCCTGTTTTTTTGAGACAGAGTCTCGCTCTGTTGCCCAGGCTGGAGTATAGTGGCATGATCTCGGCTCACT =@BA::A=C648=BD96>C'@4:(1?5:CD@CCA'3;*@?=C@=>C2<=>8BCD5>?B8@/9@/2@<+@?>CBAB=ACA79B@?@B=69A899BA?@4,?< MD:Z:0T18C14T66 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:3 SM:i:37 MQ:i:60 OQ:Z:=9@>2:>;B512BAB32=>C@5=?-=9=>>BB8>A>9@2=75@BA@>1';9 UQ:i:40 +20GAVAAXX100126:8:43:10431:27195 99 chr1 10005733 60 101M = 10006098 428 ACTCCTTTTTTTTTGAGACAGAGTCTCGCTCTGTTGCCCAGGCTGGAGTATAGTGGCATGATCTCGGCTCACTGCAACCTCTGCCTCCCGGGTTCAAGCGA CAC@CDDDDDDAAAADCCACCDCBBDB;AABCB@DBBBBACCBCBCAC@CCCCBBCBCCBC@@DB9CBBBB:@@@ADA@DBCCBCDBCC9??;?=>?<;6B MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHGGGGHHGHHHHHHHHHHGFHHHFHHHHHFHHHHHHFHFHHHHGHHHHHHGEFHHIHHGHFBDGFFHHFHHIHHHHHHHGDDA;6A8BAACAEA7BBA9<>9>8:@@A6@@=8;1BB;CCA@CA:;->C4@A7>ABBB7A?@:9?;:39A>>6:0A??BB>ACA>8(:@0A@7=BBAB6BBC@:=C>BCB?@@BC@CACCBCA>1@@1>@*@AA< UQ:i:12 +20GAVAAXX100126:8:61:2951:38254 83 chr1 10005758 60 101M = 10005434 -424 TCGCTCTGTTGCCCAGGCTGGAGTATAGTGGCATGATCTCGGCTCACTGCAACCTCTGCCTCCCGGGTTCAAGCGATTCTCTTGCCTTGGCCTCCTGAGTA BC>CC;BBA@DBDDB;>CDDCDCDCBBCDCBBCCDCCCBDACDBCCBBBBCCB;BBBBCCCCCABCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GHGHHHHHHHG=HHHHHHHHHHHHHHIHHHHHHHDHCHHHHHHEHHHHHHEHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:67:2948:89942 147 chr1 10005805 60 20S81M = 10005484 -401 GTGGCATGATCTCGGCTCCCTGCAACCTCTGCCTCCCGGGTTCAAGCGATTCTCTTGCCTTGGCCTCCTGAGTAGCTGGGACTACGGGCACATGCCATCAT #####################@A=;@>=BB@AA5<@:@CA>=@ABC:CBCBC@@B?BDACBBB@CCB@>CB;CCABBBCCBBABBBC@ MD:Z:81 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:#####################?=<6?==@>A?=4?>?@BB?:?=?B=??B@BAB?B?AAABB?AAAAA=BBBB?BA=BBB<C@CBCD?BBAACA8=BBADE MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:37 MQ:i:29 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHIHHHHHFHHHGHHHHHHHHHHHHHHHHHHHHEHH UQ:i:0 +20GAVAAXX100126:8:46:11003:55063 147 chr1 10005971 60 23S78M = 10005652 -396 TGATCTCCTGACCTCGTGATCTGTCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCAAGTTAGTATGTGTTTAAAGAT ########################@=@=8><>?C96A88@=?<*;;BAB67C;>;4?:2;B<7=A95C;B@,BCBCCCACDBDCEBCBCACADCA??CBC@ MD:Z:78 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:########################?;@>36>9>;<(:6>B@6:A=672A89>B@9@B<1B9?@)@BBACB@BBCBBCCBBBCCCCCCCCCBCB UQ:i:0 +20GAVAAXX100126:8:26:2983:65747 99 chr1 10005981 29 79M22S = 10006359 410 CCTCCCAAAGGGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCAAGTTAGTATGTGTTAAAAGATCACACTTGTAATCCTAGAACTTTGGGAGGCTGA 64911=?>?@*:=?>?@A@@:9==8'6==<=::<:@9>>=>>63;;<;<;<:<29@?6?:A;;:;:;??=3>>=;####################### MD:Z:10T50T17 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:2 SM:i:29 MQ:i:29 OQ:Z:=9==84454404455FFFFF55445+444444554C:CAC??;7?55555555.5CC3CCG4@GA44455CCC8AA??####################### UQ:i:35 +20GAVAAXX100126:8:61:18274:143796 163 chr1 10006062 60 92M9S = 10006396 429 CTAGCACTTTGGGAGGCTGATGTAGGAGGATCACTTGAAGCCAGGGGTTTGAGCCCAGCCTGGGCAACATAAAGATTACAGGCGTTAGCCACTAAGCCTGG >A:@?@?A?C?/>9A:AB;??:-7@@A<;B91'8C.>>B;C7=A%?>@@B9(8C<;:?@@A/A8::57::=0,C1%%?>:C@########## MD:Z:53A31G6 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:;>;@CC@@7;>7'5>??8:@79B79@2)?B<>8A4><>;@9==(99>;A8'5B=96@A;@0:4:737;9<.0A5%%<<5@A>@>=<==<@<>7?=>=>=977=;<=:<;:;<<9:=;:::@A=??;<<=<<;99<=7:65>A5:@:617/8>?;><=3?9<>?5>8B MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:23 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:FFDDFCACA>55544=<9<><<=CCCCC444444555445544C98>C>>7A<4@C9AACC;CA8>>@9A8C@@34BBBB@CCC?CD?7A:CA:@;B?BCBBBBABABBB@ MD:Z:64 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:######################################@74@16@AA@A@:;=>4@<@>A:@AA94B@AA@BBA>BB>0@6BA?A9BABBB?BBBBBBBAB UQ:i:0 +20GAVAAXX100126:8:3:10732:109401 163 chr1 10006103 60 101M = 10006447 444 CAGGGGTTTGAGACCAGCCTGGGCAACATAAAGATTACAGGCGTGAGCCACTGAGCCTGGACCCTTCCCTCTCTTAATCTGTTCAGGCTGCTATAACATAA @BBBABBAABCBA@CCCBCABBCACDADCCDDCCBAA;;BCB9@7>CCCC?AC@CCADCAB>BBB@?CBD:E@@@BBAACD;D= MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BABBBBACCBBAABBBBBB>BAB@BBBCBBBBBBBAB@>BBBA;9=ABBBB@BAABCBB@A?AB?=BB?BCB>??BB@@ABBC58CAABAD@C51BCACBBEDA906>;8=AB9@B:CCB@<;@9BC##################### MD:Z:81 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:<0@@@??AA9A;5>B@?B?B?B60@ABBAACB@608C@;@ABA;?BABB>>=A729=8A>ABB>;;?9AA##################### UQ:i:0 +20GAVAAXX100126:8:46:19844:141004 99 chr1 10006137 60 95M6S = 10006468 401 TTACAGGCGTGAGCCACTGAGCCTGGACCCTTCCCTCTCTTAATCTGTTCAGGCTGCTATAACATAATACTGTAGGCTGGCTACAGAGGTTTTAAAATTTG CDC?CCCA;CBDCBBCADBDCBBD@=><=<@;B34;82ABB>DCBEC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:11 NM:i:0 SM:i:11 MQ:i:11 OQ:Z:BBBBBBBBBBBBABABBBBBBBBABBBBBBBBBBBABCBBBBBBABBBBBBBBBBAABAA?A@AB?@A?5<941,8'3<@@1=>>:>32280=@?;@>A@B UQ:i:0 +20GAVAAXX100126:8:45:17313:6715 83 chr1 10006154 60 101M = 10005854 -400 TGAGCCTGGACCCTTCCCTCTCTTAATCTGTTCAGGCTGCTATAACATAATACTGTAGGCTGGCTACAGAGTTTTTAAAATTAGTATTTTTTTTTTTTTTG <>A@??BA@B?CCD>CCCACDCDCDCDCCADA@DBB;CBAB@CDCAC?DCC@CCAC?B@C@@A@?=@;??>DDDDCDDDCDBA<:;?>>??>>>DDDBDCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BACBDACCABAB><;;7?@@A?C<=;;8>+665DC:0?<@:>35;@@B9@@A?=@A;?#################### UQ:i:0 +20GAVAAXX100126:8:22:5084:77687 163 chr1 10006173 60 81M20S = 10006497 424 CTCTTAATCTGTTCAGGCTGCTATAACATAATACTGTAGGCTGGCTACAGAGTTTTTAAAATTAGTATTTTTTTTTTTTTTGAGACGGAGGCTTGCTCTGT @DBC?ACCBDBBCACCCBDB?BCAACACCCDBCA??>=?<@=;C5;@@983<::DDC;AA>A>3813ADBACC<@BBABC##################### MD:Z:81 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCCCBBBBBBABBBBBBBB>?B@@ABBBBBABB@AA@A?A==B::>@832<4CBCD?AB?;BB6?C:9ACB?@7@=AA?B<>73 MD:Z:15C85 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:1 SM:i:0 MQ:i:37 OQ:Z:BBABBBB>BBBB@CC?C=CBCBBBBBABBABBABAABBABBAA?BBBBAAB>AAAAB>AA>BAAAB@A;BBBB?AB=;@B==A39>A??=9=;>>@>4=26 UQ:i:32 +20GAVAAXX100126:8:43:16553:40365 147 chr1 10006358 29 66S35M = 10005967 -425 GTGGTGTGAGCGCGGTTCACTGCAAGCCCCACCCCCCAGGTTCCCACCATCCCCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCTGCCACCAC #############################################################################BA:8:8::2B420@B@ABA@BBC@ MD:Z:35 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:0 MQ:i:37 OQ:Z:#############################################################################@@727537.@8<.@A>ABB>BB@B UQ:i:0 +20GAVAAXX100126:8:26:2983:65747 147 chr1 10006359 29 68S33M = 10005981 -410 AGGTGCTTGAGCCCGCCCACATGCACGCTCCACATCCCGGTTGCACACCGTACCCCTCCCTCAGCATTCCGAGTAGCTGGGACTACAGGCGCCTGCCACCA ############################################################################################BB@?;7=@? MD:Z:33 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:############################################################################################BB@@70>=> UQ:i:0 +20GAVAAXX100126:8:62:16536:90438 147 chr1 10006385 60 66S35M = 10006082 -337 CCCTCCTCCCAGCTTCACTCCTTTCTCCTCCCTCAGCCTCCCGAGTAGATGGGACTACAGGCGCCTGCCACCACGCCGGGCTAATTTTTTTATTTTAGTAG #####################################################################B4,B3?$6;63ACC@9@DBDA;9?8C5?1:D@ MD:Z:35 PG:Z:BWA RG:Z:20GAV.8 AM:i:23 NM:i:0 SM:i:23 MQ:i:60 OQ:Z:#####################################################################?5+@5>(:742@BB>6>BBC?=6<5B6;1;CB UQ:i:0 +20GAVAAXX100126:8:61:18274:143796 83 chr1 10006396 60 5S96M = 10006062 -429 ACGCCGGGCTAATTTTTTTATTTTAGTAGAGACGGGGTTTCACCATGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCTGCCGGCCTCGGCCTCC ######?9AB><<<=@<>@==B@B4;59*:@AB?A8;;;=>?@A>7.=,;2;=88<:4;<6<<=:;95:=:8>A?>>==;/7=/:@<7>BAB>GDG@=@FDFFD5/5.57@A@845554;?2=4555044555 UQ:i:0 +20GAVAAXX100126:8:23:12774:197414 99 chr1 10006420 60 101M = 10006790 437 AGACGGGGTTTCACCATGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCTGCCGGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA CCC?;CCCCDDBCABCCBBDCCBBCCCDCBCBBCB;CCBCBBCBDABDB;BBDCBDBBB;CBBDB;CBBDBBBCDDCABBDB>CDCDCACCCC;CCDDCDA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:23 NM:i:0 SM:i:23 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHEHHHHHHHHHHHHHHHHHG UQ:i:0 +20GAVAAXX100126:8:67:19765:57485 83 chr1 10006437 11 1S18M3I77M2S = 10006149 -382 CGTTGGCCAGGATGCTCTCGTGGATCTCTTGACCTCGTAATCCGCCCACCTCCGCCTCCCAAAATGCTGGGGTTACAGGCATGAGCCACTGTGCCCGGCTT HHFEHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:456 +20GAVAAXX100126:8:3:10732:109401 83 chr1 10006447 60 101M = 10006103 -444 ATGGTCTCGATCTCCTGACCTCGTGATCTGCCGGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCCTAAAATTAGTATGTT =ECBDC=4@=B;5>>=B@CCB9AC@BDCCBC;BBCCD;BBCCACBBDDDACBCBBBBCACBBDBB;?CBCBCBBCBB;BCC;BBCCCDDDCDCDACCA@DC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@HHHHHCBFBD?9AA?HDHHEGHHGGHHHHHHHHHHHHHHHHEHGHHHHHHHHGHHHHGHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:46:19844:141004 147 chr1 10006468 60 30S71M = 10006137 -401 TGAGCCAGGTTGGTCTCGATCTCATGACTGCGTGATCTGCCGGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCCTAAAAT ###############################?9<'@A<:=9>8@:>;7C632<83.1;A=;000-8*?;&9A>>BA9>>86>?CA;B@A>@ MD:Z:71 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:###############################@4<&>0032;+==?@0=907A;6(9@6=BB@A@8=;=B@=@?@;@ UQ:i:0 +20GAVAAXX100126:8:48:11905:175240 83 chr1 10006478 60 101M = 10006103 -475 CGGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCCTAAAATTAGTATGTTTTTTGTTGTACTTGTTTTTGTTTTTGTTTTT /8,)09:AABA<>B>ADDACBACCB>AD>BBBBB9ACBDB>B@CBA;B@C;BB@BCD@DCDCDAC??<>??DDBADCACB>?@?DCCCBDCC@D>C;ABBCBCBCCBB9BCC;BBCCCDDDCDCDACBB?ABADDCADCAC9@@B?A@@AB@AAAAB?AAAADDDDDCBDBB;BBDADBCBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:5HEHHHHGHHHGHEHHHGHGHHHHHHHGHHHHHHHHHHHHHHHHHHGGGGGGHHHHHHHHBGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:22:5084:77687 83 chr1 10006497 60 101M = 10006173 -424 GGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCCTAAAATTAGTATGTTTTTTGTTGTACTTGTTTTTGTTTTTGTTTTTTTTTTGAGGCGGAGTTTCA *@DBCCCCDCCCDCC;ACBDBCBC@BB;A@@:BACCC?DDCDCDACBB?ABADDCADCACB@@B?A@@AB@AAAAB?AAAADDDDDCBDBB;BBDADBCBC MD:Z:0T100 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:2GHGHHHHHHHHHHHHHHHHHHHHFHHHFFFIHGHHHEHHHHHHHHGGGGGGHHHHHHHHHGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHH UQ:i:9 +20GAVAAXX100126:8:21:11247:101401 147 chr1 10006514 60 12S89M = 10006257 -345 GGATTACAGGCGTGAGCCACCACGCCCGGCCTAAAATTAGTATGTTTTTTGTTGTACTTGTTTTTGTTTTTGTTTTTTTTTTGAGGCGGAGTTTCACTCTT #############@@A6@A@>@;@@A:A@C>B@?@AB=@>A>BAABBA@@AB@ABA=ABBABBA1AABBB?BBB@BBBCBBBBCBBCCCCCCBCCBBCBCBCCB@CBBCB UQ:i:0 +20GAVAAXX100126:8:46:8850:165700 83 chr1 10006624 37 101M = 10006281 -443 TGGCGTGATCTTGGTTCACCACGACCTCTGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGACTACAAGCATGCACCACCATG EDC;ADCCDADBA@ACB@CBC;BACCDCCBCCDCC;CCADDBDDB;?CDDCDCCCBCCDBDBCCDCCCBCACDBCCBBBBCCBBDDBBCCBBCCCCC@BCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:37 MQ:i:29 OQ:Z:HHHHHHHHHGHGGGGFHDHHHHHFHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:22:6227:26566 163 chr1 10006730 60 83M18S = 10006988 358 CTAATTTTGTATTTTTAGTAGAGACAGAGTTTCTCCATATTGGTCAGGCTGGTCTCGAACTCCCGACATCACGTGATCCACCTGCCTCAACGTCCATAAGG @DBC@?BBBBCCBBCCCCADCCCCBCBCCBCCBDABAB9BBA?@?@@?A??=;7=?39@?81AA886:?>:>3687':A@?A################### MD:Z:83 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCBCCBBCACBCBBBBB;CBBBACBABB?BBBBBBBB>BBA@;A?<=A:A>2:6;83>?7/?A=5<9<<:;5/55(:=?@?################### UQ:i:0 +20GAVAAXX100126:8:2:2951:16973 99 chr1 10006763 60 101M = 10007005 342 TCCATATTGGTCAGGCTGGTCTCGAACTCCCGACATCACGTGATCCACCTGCCTCAACCTCCAAAAGGGCTGGGATTACAGGTGCGAGCCACCTCGCCTGA <<>7ABBCBCDABDBBADDD>:BBDB@CCCD@AACCC=B4BCBCCACDC;CCDCDBC<6 MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHCHFEHHHHHHHEHHHHHHHHHHHDFHFHHHHHHHEHHHHHHHHHHHFHHHECHHHHFHGHHFHEHHHEHBFHHHHHHHHHHHHHHHH@5 UQ:i:0 +20GAVAAXX100126:8:23:12774:197414 147 chr1 10006790 60 33S68M = 10006420 -437 AGTTTCTCCATATGGGTCAGGCTGGTCTCGAACTCCCGACATCACGTGATCCACCTGCCTCAACCTCCAAAAGTGCTGGGATTACAGGTGCGAGCCACCTC ##################################'@@3DB:?CA@@A9BA=BBCCCDACABACCABDABACC<BBAB UQ:i:0 +20GAVAAXX100126:8:21:7822:20748 163 chr1 10006812 60 83M18S = 10007120 408 TGCCTCAACCTCCAAAAGTGCTGGGATTACAGGTGCGAGCCACCTCGCCTGACCAAGTTAGTATGTGTTTAAAGATCACACTTGTAATCCCAGCACTTTGG @=5AA>>A:AA>=7:C?4@A@A>CC?;A:;A@;=7<+77:>C?C@5.A:B(4@.+**@@><;BA<61ADC8:@;A7;AA<@A################### MD:Z:83 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@86@??:@9?@;BB;8?7;?><88@59;>?BAAA50@9>(3@/)()6>=>7AA9-0BBA:3<;?5CCCC?ABDBCCCCCCACCB@BC;CCB@B=BCA9ACD@CACACA@C@CCCDBAAACBBD@ACA4CA?@@B@BA@AB@@CCA?@AAA@C*ACA:A MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBB>ABBB@BBBBBBABBBBBBAB@=A?A@>>@@@?@??=?@-<=A4< UQ:i:0 +20GAVAAXX100126:8:1:4286:192808 99 chr1 10006849 60 101M = 10007174 425 AGCCACCTCGCCTGACCAAGTTAGTATGTGTTTAAAGATCACACTTGTAATCCCAGCACTTTGGGAGGCTGATGTAGGAGGATCGCTTGAGGCCAGGTGTT <<;:A:??BDBBCDC=BAC?BCA?DDB>>?ABABBACBB>><:=CDCB;@ADCDC?@CDA=;?A5 MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GGEGGEDHHEHHHHHEGCGDGGCGEHHHHHHHHGHBF9FF=CDGGHHGHHHEHGHEHHHDHHHEECGGGGHEHHHAA?=AHHHHHEEHHHHDEHHGBBGG4 UQ:i:0 +20GAVAAXX100126:8:63:5666:148033 163 chr1 10006932 60 101M = 10007256 424 CGCTTGAGGCCAGGTGTTTGAGACCAGCCTGGGCAACATAGGGAGACCCTGTCTCTACAAAAAATACAAAAATTAGTTGGGTGTGATGGTTCATGCCTGTA @;BCAABBCBCCBBABABCBCCABCCBCCCBCCBBC?BBBBBCCA?ACCC@>BACD>ACBBBCBBA>BDCACBB@B:BBB;9A>ADCA@C?AC?B?CD@B@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBBBBBBBB;BBCBABB>CBBABBAAAABBBBBBBBABAB>AAB?B=?ABA>BA@A@AAA@B@A@@@@A@A7A@A<3>9@AA@A>>?A>@B@>@9? UQ:i:0 +20GAVAAXX100126:8:62:2555:107007 99 chr1 10006969 60 101M = 10007301 421 ATAGGGAGACCCTGTCTCTACAAAAAATACAAAAATTAGTTGGGTGTGATGGTTCATGCCTGTAGTCTCAGCTACTCAGGAAGCTGAGGTGGGAGGATTAC CCCACCDCDABBDBBBDBDCACDDDAABA?CDDD@BDCCBDBCC@B@BDCBCBDBC@BBBDBCCCCBDBCCBDCADBCCCCDCBDBDAC=CC?DCD?BAC1@9?A8@C1CB@4?<@%1:<9@@7?>CCACCC*BB9C?;;0BC@@A?799>#7)6ACBBA66CBDC<(-A?B=AB96<;BBAAAA@@C@ MD:Z:0T22C39A37 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:3 SM:i:37 MQ:i:60 OQ:Z:#?219@>1;9>>8>A.@A;2><<%/;=7@97;9A?BAB=BAABB*=?AB@CAA=BBA UQ:i:8 +20GAVAAXX100126:8:46:18774:97831 163 chr1 10007015 60 79M22S = 10007344 429 TGATGGTTCATGCCTGTAGTCTCAGCTACTCAGGAAGCTGAGGTGGGAGGATTACTTGGGCCCAGGAGGTCGGGGCTGCAGTGAGCCAGTGACCATGCCAG @CCB?B@BBCCBBBDBBCCBBDBCCBCCADBBBC?AAA@A?AC=?CCACC=?C==CB@;A>AB=AA6@B8>;?<>@??####################### MD:Z:79 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCBBCBBBBBBBBCBBABBABBBBBBABBBBAAB@ABBAB@AB6ABA=BA??@@;?@<;@?@A=@?7?@4??==>?=?####################### UQ:i:0 +20GAVAAXX100126:8:24:7834:75053 163 chr1 10007028 60 64M37S = 10007352 424 CTGTAGTCTCAGCTACTCAGGAAGCTGAGGTGGGAGGATTACTTGGGCCCAGGAGGTCGGGGCTGCCGTGAGCCAGTGACCATGCCAGTATACTCCAGCCC @DB@AA@ADACCBCC>CAABCBCBADBBCC@?BB7BB:CB9<80:C@###################################### MD:Z:64 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBABABBBBB@AA?AB@AAABB@BB6@BB@BB;A@AAA@:=B??A@A?3=B?###################################### UQ:i:0 +20GAVAAXX100126:8:68:8790:55871 163 chr1 10007049 60 96M5S = 10007343 394 AAGCTGAGGCGGGAGGATTACTTGGGCCCAGGAGGTCGGGGCTGCAGTGAGCCAGTGACCATGCCAGTATACTCCAGTCCTGATTACAGAACAAACCCCTA >=<1?;B>:&0B1'253=B8>=3:5A<3=*?.>=15255%><;=78;CB<6&?@@B<59B<855(C;D?@1;;5/5@;+@>;9;77@B75(;A(-B>C;A>A6BA###### UQ:i:24 +20GAVAAXX100126:8:68:20656:34221 83 chr1 10007102 60 101M = 10006818 -384 AGTGACCATGCCAGTATACTCCAGCCCTGATTACAGAACAAACCCCTATCTCAAACAAACAAACAAGTACATAAATGAAAGAAAGTTTGTGTTCCTACCAC =BDCCDCCCCCAB?@>@ACDCBDBCCCCBCDCC@D@DCBDDACCCCCCDCD@ADBA@DBBDDBBDCACBBCCDDCC@ADD@DDDADDCACAADCCCCABCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:>HHHHHHHHHHFFFFBFFHHHHHHHHHHHHHHHGHFHHHHHFHHHHHHHHHGGHHGGHHHHHHHHHHHHHHHHHHHGGHHGHHHHHHHHHHGHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:21:7822:20748 83 chr1 10007120 60 101M = 10006812 -408 CTCCAGCCCTGATTACAGAACAAACCCCTATCTCAAACAAACAAACAAGTACATAAATGAAAGAAAGTTTGTGTTCCTACCACAGGTGTGTCCAGTGAGAA >)49;7%9;<;0:?9@B9>A?990555;8:945;8?@:;AA><<:=;7;?9=<;<;;49;>;1@>A>?:<>;=>:??5<<=A?=A>9;7==< MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:C+5554.=>=>36@@EB6FDFF?FFFC?B@=ACAB;ADDADBACADDADDB@BBACCBC@CBACBC@B=?@9>3?### UQ:i:0 +20GAVAAXX100126:8:3:4050:181815 163 chr1 10007126 60 101M = 10007427 401 CCCTGATTACAGAACAAACCCCTATCTCAAACAAACAAACAAGTACATAAATGAAAGAAAGTTTGTGTTCCTACCACAGGTGTGTCCAGTGAGAAGAGTGT @CCCABBABACCCBADDDBDCCDCDCDCDDEACDC@ABB@BCBBAACDCD>CCADDCCDBBBCBCC?BDBAEC?BB@@BBB@CABCDB>C77B@A>1C?D? MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCCBBBCBBBBBCBCBBCCBBBBCCBCCBCBBBBBBBBBBBB@BBBBBB@BBBBBB@B@B@BBBAB@BBBBBBBAB?CB@@AB@BBAB@C4@>;;-?9>5BA<@:@;=CC@61>>CC=9BCCADC?<:428>=;?;?4'8=7=6?@9<<<:@BB<11?>@@=3;A@<=6>A?@B<:3*?>;=?3BAEEAA=<<26:35//C029A0A&4::A9CC@B;>?59=672-B444A-=+-49=/:,))@?9:'(?3?BC@?)9?CBC**?)A@:CB<3@########## UQ:i:21 +20GAVAAXX100126:8:48:10108:181079 163 chr1 10007155 60 101M = 10007444 389 AACAAACAAACAAGTACATAAATGAAAGAAAGTTTGTGTTCCTACCACAGGTGTGTCCAGTGAGAAGAGTGTGATGTTGCAGATGAGAGGGGATAAATGCT @D@BBA@BCDACCAACACDCDDCCCDDCCDDCBC@A>A@BBCDC?CCACC@AB>CBBCCC@BBBCD?AC?@CC@BA?BBA@ABABCBB?C@AB?BBCB@DA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCBBBCBCBCBCBCBBBBCBBBBCBBBBABBB@BCBABBBBBBBBBBBBBA@A?BABBBB@B@B@AA?B8B>A?AA>@@B?B>@A???@@?@>>@@?>@A> UQ:i:0 +20GAVAAXX100126:8:48:10118:181094 1187 chr1 10007155 60 101M = 10007444 389 AACAAACAAACAAGTACATAAATGAAAGAAAGTTTGTGTTCCTACCACAGGTGTGTCCAGTGAGAAGAGTGTGATGTTGCAGATGAGAGGGGATAAATGCT @D@BBB@BCCACCABCACCCDDCBCDDCCCDCACBA>A@BBCDC=BCABC@CBBCCC?BABB?@@C@@CCABABBCA@ABABBA<@B@A>A@<=C@CB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCBBBBBBBBBBBB?BBBBBBBBBBBBBAABBBBBBABBBBBBBAABBABA8B@BABBAB?B?B?;B>A;B>B@BB@@BA?A>@A>>8A?@?9@=87??@? UQ:i:0 +20GAVAAXX100126:8:1:4286:192808 147 chr1 10007174 60 101M = 10006849 -425 AAATGAAAGAAAGTTTGTGTTCCTACAACAGGTGTGTCCAGTGAGAAGAGTGTGATGTTGCAGATGAGAGGGGATAAATGCTGGACCAAAGTCCTTGAGAA =DADBADAB+@D8CDC:.;>6A?3:>'?@DA@;;@ACBBD9BBE@DE?A@AAACDC:DCBB@A@>?BCDB5=ADCBCBB@@CCC?CC?BDACA?CBACB<@ MD:Z:26C74 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z::?AAA?BBB+?@;BB?:+8=5@;37>(><@AA6A?B>ABBBB@ACDCC?CC@CCADCBB@CBBC=ABCBCC?@ABBA;ABCB?8CCACAAA>BB@CAD@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBABBBBBBBCCBCCCBBBCB@B?BABBBB@BAB@ABAB>B@BBBBBBBB@B?BB@B>BB@A?@A@AAB@>BABA;@B?A;3?BAA?@;<>A6@>@= UQ:i:0 +20GAVAAXX100126:8:7:7502:151389 1187 chr1 10007181 60 101M = 10007491 410 AGAAAGTTTGTGTTCCTACCACAGGTGTGTCCAGTGAGAAGAGTGTGATGTTGCAGATGAGAGGGGATAAATGCTGGACCAAAGTCCTTGAGAAGGTGAGA @CACB?8AABABA@CDECBDCACCC@B?CBCCCC;A7=:+;;9??;C@CC?CB=DCCDCC?5ABCA6>BC?@B;8=<9@BBBDB?C@;@B9B>1ABCC?>@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCCB@.@CCAABCCCCBCCBBBCBA@?:@<4<=C@BAB@A:A;6@?7A9.@;6< UQ:i:0 +20GAVAAXX100126:8:24:17458:9144 163 chr1 10007190 60 101M = 10007539 449 GTGTTCCTACCACAGGTGTGTCCAGTGAGAAGAGTGTGATGTTGCAGATGAGAGGGGATAAATGCTGGACCAAAGTCCTTGAGAAGGTGAGAGAGAAAGGA @AB@A@BCBACCABCCBBBBBCDBCABCCCDCB?9A;@@@B@@A@ACC@A>BCACBBCACBBA?AC=@DA;CCBB>A@D92=BA@BA8?C>@A?CBCD@CA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBCBBBBBCBBBB@B@BACCAB?BABBBB@=7B?AAAB@@@B@BA@@@A@BA@A@@BB@@@?@@?BB<@@AA:A?@9/;@?=@>5@?====@?>@>>> UQ:i:0 +20GAVAAXX100126:8:45:16821:90529 1187 chr1 10007194 60 101M = 10007535 436 TCCTACCACAGGTGTGTCCAGTGAGAAGAGTGTGATGTTGCAGATGAGAGGGGATAAATGCTGGACCAAAGTCCTTGAGAAGGTGAGAGAGAAAGGAGAGA @BCCA?BBACCC?ABC?BDCCBBCCCDC>C9CBC?B>=7ABBB=ABACBC9=A@@CD??>AC@8CA;CBD?C@&=B@BABBA>;:D MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCBBCBCCBBC@C?CCBCBBABBBBBC9C5B@B@BA>;BBAB9BB=BAB=>>AB>B?>=':B@A=:@@=/683=?:@<(<=A<663B UQ:i:0 +20GAVAAXX100126:8:45:16833:90513 163 chr1 10007194 60 101M = 10007530 436 TCCTACCACAGGTGTGTCCAGTGAGAAGAGTGTGATGTTGCAGATGAGAGGGGATAAATGCTGGACCAAAGTCCTTGAGAAGGTGAGAGAGAAAGGAGAGA @CCC@?BBACCC=AABABDCC@BCCCDCCCBCACBBA@@?BBB@?BCC@BABB@?@=B?AA>CDDAC@+>BABC?>AC8=>@2BBBBBCBB+;BAAB>=A@5;7;/=?B@AAC>A=C>ABC=CABA##### MD:Z:97 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCBBBCBBB@BBB>BA;AABBBBBBAB8B?BABBAABBAB@AB?A?BBAA?@AA@@A@?@@B?@B?=?9B=A<;@9A@8@>=:A?=@##### UQ:i:0 +20GAVAAXX100126:8:27:21458:11536 629 chr1 10007225 0 * = 10007225 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTNAAANTGTTAANNNNNNNNNNNNNNNNNNNNNAGTN ##################################################################################################### PG:Z:BWA RG:Z:20GAV.8 OQ:Z:##################################################################################################### +20GAVAAXX100126:8:27:21458:11536 697 chr1 10007225 37 16S85M = 10007225 0 GTCCAGTGAGAAGAGTGTGATGTTGCAGATGAGAGGGGATAAATGCTGGACCAAAGTCCTTGAGAAGGTGAGAGAGAAAGGAGAGAAAGGAATCCAGAGTC #################BAA=@D@2@;974/A@B:3'2>7@CBC=>D>::73?BB87=28?A=AAC>7:8@BC@D=CCCCCD?BACBBB>CCCBAAACAC@ MD:Z:85 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:37 OQ:Z:#################A=@>@=1'0=8@AA@<9B@56<.??=;8<19@B?BBB@884>BA>B;BCABCB>?AACCB?;=@C>;B4=>2@A?C=9C>:>?>=?A8-8ACBDAD@CCCA?D@E=BBCBBCCBCABAC@C@ MD:Z:69 PG:Z:BWA RG:Z:20GAV.8 AM:i:0 NM:i:0 SM:i:37 OQ:Z:#################################@7/03=?<;?@=6>59=7=@;??;B?;?A@@AA:-6?AAB@B@BB@?=B@C@CA?==?@C?83A<5@?7669-@BBCC5AABDBABBBBA?AABBCCCB;CADCABDD@C?ACEEAACDBCCA@?CBB@ MD:Z:20G69 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:############??=5686>@A@?<<>??:9/=:8>;524:+:=<=ABBB@@@ABAB@BABBABBBBBABABBBBBBABCAB9789;<8::;985;:8:298648:==<@>;:;;;:6C@@C<:;>=<=:6=?A??@?>5<<9<;;:>?=???>AAB<=?<@<<==<>:@?=<@?=>B5?:< MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:A44445544455555555554:9<>=DDDD>@==@<;GFFG54444?<>8>55554ADDFAGAGFGDDADF??A>>>@C?>DFAADGAGDG UQ:i:0 +20GAVAAXX100126:8:46:18774:97831 83 chr1 10007344 60 101M = 10007015 -429 ATAACAGCACATCCTTACGGTTGTAGGTGGCTTTGTAGATCTAAGAAGATCAGGAAGATTGCTGGCTCTAGTTTTGTAGATTATCAGGAAGATTATCAGAA ?DDCCBCCCCCDCCDCC;CADCACDBACBBCDDCACDBCDCCDDBDDBCDBCBBDCBCDCBCCBBCDCCCADDDCACDBCDCCDBDBBDDBCDCCDCBBDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:24:7834:75053 83 chr1 10007352 60 101M = 10007028 -424 ACATCCTTACGGTTGTAGGTGGCTTTGTAGATCTAAGAAGATCAGGAAGATTGCTGGCTCTAGTTTTGTAGATTATCAGGAAGATTATCAGAAAAACATAT C?CDDCDCC;CADCACDCACBBCDDCACDBCDCCDDBDDBCDBDBBDDBCDCBCCBBCDCCCADDDCACCBCDCCDBDBBDDBCDCCDAC@ADDDCCACCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:FGHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGGHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:3:4038:181802 1107 chr1 10007427 60 101M = 10007126 -401 TCAGGAAGATTATCAGAAAAACATATCTTAACCTGGTGAGCTCTCTATGCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTA EDDCCDDCCDC@DACA@DDDCBCCCDCDCDCCCCBACBDBBDCDCCCCBCDCCBBBBCDC@@CCDDDCBCCCDCCCDDDCCCDCBBDDCDDCCDBDCBDCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHEHGGGGHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:3:4050:181815 83 chr1 10007427 60 101M = 10007126 -401 TCAGGAAGATTATCAGAAAAACATATCTTAACCTGGTGAGCTCTCTATGCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTA BDDCCDDCCDBCDACA@DDDCBCCCDCDCDCCCCBACBDBCDCDCCCCBCDCCBBBBCDC@@CCDDDCBCCCDCCCDDDCCCDCBBDDCDDCCDBDCBDCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GHHHHHHHIHIHHGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:48:10108:181079 83 chr1 10007444 60 101M = 10007155 -389 AAAACATATCTTAACCTGGTGAGCTCTCTATGCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTT EEDCCCCCDCDCDCCCCCACBDBCDCBCCCCBCDCCCBBCCDC@ACCDDDCBCCCDCCCDDDCCCDCBBDDCDDCCDBDCDDCBCCDDABBDDDDCCBDDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHIHHHHHIHHHHHHHHHGHHHIHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:48:10118:181094 1107 chr1 10007444 60 101M = 10007155 -389 AAAACATATCTTAACCTGGTGAGCTCTCTATGCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTT EEDC@CCCDCBCABCCBCABBDBCDCDCCCCBC?CCDDDC@CCCDCCCDDDCACDABBDDCDDCCDBDCDDCBCCDD@;BAAAA@@7>=< MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHGHHHHHFHGGHHGHHGHHHHHHHHHHHHHBHHHHHGHGHEEHHHHHHFHHHHHHHHHHHGHHGHHHHHHHHHHHHHHHHHHHHHCCHGGGGFGDGGG UQ:i:0 +20GAVAAXX100126:8:43:20013:129172 83 chr1 10007463 60 101M = 10007196 -367 TGAGCTCTCTATGCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTT DDDCCCCDCCCCCCDCCCBBCBDC@ACCDDDDA>CCDCCCDDDBCCD>BBDDCDDCCCBDCDDCBCCDD@@BDDDDACDDDDDBCDCDCCCDBBDC@A>DC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:IHHHHGIHHHHHHHHHHHHHHIHHGGHHHHHHFEHHHHHHHHHGHHHEHHHHHHHHHHHHHHHHHHHHHGEHHHHHIHHHHHHHHHHHHHHHHHHHGHGHH UQ:i:0 +20GAVAAXX100126:8:42:6558:108923 163 chr1 10007468 60 101M = 10007800 432 TCTCTATGCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTT @BCABABBBDBDBBBCBDCBCDDCEDDCCDCCBCCBBBBBBCCC:CDCCC@DDADCCDCC@CDBBC>BDCBD@BBCBBCBBBCB@ECB=D@ABBDBCDAAC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCCBBBBBBBBBBBBCBCBCBCBCBBBBBCBBBBBBBBBBBBB@BBABBBBCBBABBBBBBBBABBAAABA@B@A@ABABAB@BBAB>A@@A?@@?@?;B UQ:i:0 +20GAVAAXX100126:8:45:3840:54807 163 chr1 10007468 60 101M = 10007795 427 TCTCTATGCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTT @CCAA@BABDBBB?DABDBB7B?9A=BAC:@D??@??AB=;ADCBDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCBCCBCBBC?B@>ABBBCCBCBBCCB?ABCB:B@BCBC<=BCCC>@CBB>BBBBCBBB=BCBBBA6B>C@CBBBCBA<>ACBBA95>A?@BB UQ:i:0 +20GAVAAXX100126:8:61:17876:93642 99 chr1 10007468 29 101M = 10007872 438 TCTCTATGCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTT CBC@DCCCBDBDBCBCADDBDDCCDDAABDCCBBCCDDDCCCDCACDDCDBDCCDDCDDCABDDBCACDDDA?DDDDDBCCDCDADCCCDCCCDDADDD@@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHGHHHGG UQ:i:0 +20GAVAAXX100126:8:67:10895:75253 147 chr1 10007475 60 101M = 10007283 -292 GCTCTGGCACTTGAATAAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTTTTGGATA ;DBCCAB@CAAC?BCBCCDB=ABA>BB?DDACAC@BBACCBCCCCECADBBCADCC@C@BBBBB:BACDCCCCDCCACCDDBDCBBDBCDCDDCB@@ACC@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:9@?@@>@@?@@@A@A@@A@@>@A@=AB@BBBB@BAA?ABBBAABBBBBBABB@BBAAB@BBBBB>BAABAABBBBBBBBBCBBBBABBBBBCCBCCCBBCB UQ:i:0 +20GAVAAXX100126:8:68:20495:7123 99 chr1 10007491 60 101M = 10007858 412 AAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTTTTGGATACGGAGTCTTGCTCTGT CDDBCCDCCBBDCDDDCCCDCACDDCDBDACDDCDDCABCDBCACDDDA?CDDDDBCCDCDADCCCDCCBDDDDDDA@@@ACDCCA;CDCCCDDCCDCDDD MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:25 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHGGGGGHHHHHHHHHIHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:3:11681:100672 83 chr1 10007491 60 101M = 10007181 -410 AAAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTTTTGGATACGGAGTCTTGCTCTGT DDDDBADDDDDB>DCD@==6@<=BB6AAAADDDDDCBBCCB;BBDADCDCCCDACAC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@@DDCDDC>@BDCDDC=BAAB@:BDDDDBC?DDDDBC>ADBCCCBBBBBAAAAADDDDDCBBC@B;BBDADCDCCCDACAC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:AHHHHHHEEHHHHHHHBCBDDGHHHHHHEEHHHHHH?GGFFGBHHHHHHHDHHHHHHEGHHHHHHHIHHGGGGGHHHHHHHHHIHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:3:9830:12196 163 chr1 10007492 60 83M18S = 10007808 416 AAAGCTATCCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTTTTGGATACGGAGTCTTGCTCTGTC @DCBAAABBCDCCBDDCDCCACDDCCBDCCCDDCBB?BCB@CACBBDDADACCACDCDBD;D@BCC=CCCBDCABC@8.67A################### MD:Z:83 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBCCCBCCBBBBCBCCBBBBBBCBBCBBBCBBBBBBBABBBB@BBBBBBBBBBBBABCB@BA@@AB@AB@@@A?8-49?################### UQ:i:0 +20GAVAAXX100126:8:4:4481:73051 163 chr1 10007500 60 101M = 10007790 390 CCTAAAATATTACAAATTCTAGAATTTACCTTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTTTTGGATACGGAGTCTTGCTCTGTCCTCCAGGC @DCAAABBBCBCA>DDCBCEDCCEDCCCBDEBBD?@?B@B@B@CACCCAC;BB;DC@DCCADCB@C>?DCABDAB>;6>98-CC@>@C=D@BB@D@?D=B< MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCCCCCCBCCCCABBBCCCCCCCCCCCCCCCBCCCCBCBBCCCBBCBCCCCCCCC?BBABBBBCB@CBBB=B@B>=><76(AA>90))?(*CD################################################ MD:Z:32T21 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:A7445*?)))@*+4@?)?A*.->:BA8?@2))?(+BA################################################ UQ:i:7 +20GAVAAXX100126:8:45:16833:90513 83 chr1 10007530 60 101M = 10007194 -436 TTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTTTTGGATACGGAGTCTTGCTCTGTCCTCCAGGCTGGAGTGCAGTGGCACCATCTAGGCTAACT AE>;CDDDDBC?DADB@CDCDCC>DB@@=B>?CBCDCCDCCCCBBCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:EHCBHHHHHGHEHGHEGHHHHHHDHHGEEHEBEEEHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHGHHHHGGGEDHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:45:16821:90529 1107 chr1 10007535 60 5S96M = 10007194 -436 TTCACAAAAACTTTTTCATTAACTAGGAGGCTTTTTTTTTTGGATACGGAGTCTTGCTCTGTCCTCCAGGCTGGAGTGCAGTGGCACCATCTAGGCTAACT ######<<=965DDDDBADCDC>=?BBB=B4=A?ADAADDCBBCCB;BBDADCDCBCDCCADCCDCBCBBCCBBDACBBDACBBB@CBCDCCDCCCCBBCC MD:Z:96 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:######@@@:89HHHHHFHHHHDAAHHFEH6>GEGHGFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:24:17458:9144 83 chr1 10007539 60 101M = 10007190 -449 ACTTTTTCATTAACTAGGAGGCTTTTTTTTTTGGATACGGAGTCTTGCTCTGTCCTCCAGGCTGGAGTGCAGTGGCACCATCTAGGCTAACTGCAACCTCT D@E@EEDCCDCDBCCDCBDCB=A?AADDAADCBBCCC;ABDADADCBCDBCADCCDCBCBBCABBCACBBDACBBB@CBCDCCDBBCCDBCCBCDCCACCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HGHEHHHHHHHHGHHHHHHHHEGEGGHHGGHHHHHHHHIHHHHGHHHHHIHHHHHHHHHHHHFHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:63:6242:80411 163 chr1 10007707 60 101M = 10007988 381 CCACACCCAGCTAATTTTTGTATTTTTAGTGGAGACGGGGTTTCACCATACTGGCCAGGCTGGTCTGGAACTCCTGACCTCAGGTGATCTGCCCGGTGAGG @CB@A?BBCBBDBBCCCBCCBCDCCCCCCBCCCCA@9@BCB@ACCBC>CC@BCBBC<4A@9;-2B<@7@<-<::7-88AA7<9=75A MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBCBBBBBBBBBBCBC@BCCCCBBB@BBBBABBA?B?ABAABBAABAABA?AA@A;1;A1=;?@7;<6?@59,3@:?4;<(5586*67=@8;/44/? UQ:i:0 +20GAVAAXX100126:8:7:17495:193398 99 chr1 10007724 60 101M = 10007961 337 TTGTATTTTTAGTGGAGACGGGGTTTCACCATACTGGCCAGGCTGGTCTGGAACTCCTGACCTCAGGTGATCTGCCCGGTGAGGCATAACTTTTATTTCAG <<=:AAAAA;;A=AB?@C@:=?=9@A@B?AB?A?A@BAABA?A@;73;718;;9=8><<>?@@>?AB:@AA>=5A<=7;:?CB@A?@A?@A>==:>@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GGGGGGGGGBBGAGGDFGGGDCDCGGGGGGGCGGFFGGGGGCGC=5/582555559D=?CCCCCCGGCGEFC?;G@?D?4BD@DDGGFGGAGEGGBBB>CG UQ:i:0 +20GAVAAXX100126:8:68:12222:58992 99 chr1 10007740 60 101M = 10008056 416 GACGGGGTTTCACCATACTGGCCAGGCTGGTCTGGAACTCCTGACCTCAGGTGATCTGCCCGGTGAGGCATAACTTTTATTTCAGTCTCACATTTCAGTTT CD@9CCCCDDBCABCCCADBCBBCCCADBC;BCBCDD?DBBCBAABCBCC?BBACBDBBBB;BCBDCCBCCCDADDDD>C?A@CCCBDBCACCDDCADBEE MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHIHHHCHHHHHHGHHHHHFHHHHHHFGHFHHHHHHHHGHHHHHHHHHHHHHHHDHEGGHHHHHHHHIHHHHEHFHH UQ:i:0 +20GAVAAXX100126:8:44:5440:118213 163 chr1 10007743 60 101M = 10008043 400 GGGGTTTCACCATACTGGCCAGGCTGGTCTGGAACTCCTGACCTCAGGTGATCTGCCCGGTGAGGCATAACTTTTATTTCAGTCTCACATTTCAGTTTGTT @CCBA?BBCACCCABDCCCDCBCCA@A9B;?C;C7;>@3:;8CA@=@B5A:9@;A@CC:@6=:?A=:@:AD@AA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBACBBBCCBBBCBCBCCBABC>?@4B8>B8A<=<:=<>=BB?>3<7?96:3<@@7> UQ:i:0 +20GAVAAXX100126:8:4:4481:73051 83 chr1 10007790 60 101M = 10007500 -390 GTGATCTGCCCGGTGAGGCATAACTTTTATTTCAGTCTCACATTTCAGTTTGTTCTGTGGTCTCCTCCTTTATTTGTTGCTAGGTAAAAAACATTTTTTTT BECCDCDCCC;CACBDCBCCCDCAAADACDDDBDADCDBCBCDDDBDADACADDCCACBADCDCCDC@ADCCDDCADCBCCDBABAADDDCBBAADDBDDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHGGGHGHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHGGGHHHHHGGGHHHHHH UQ:i:0 +20GAVAAXX100126:8:45:3840:54807 83 chr1 10007795 60 101M = 10007468 -427 CTGCCCGGTGAGGCATAACTTTTATTTCAGTCTCACATTTCAGTTTGTTCTGTGGTCTCCTCCTTTATTTGTTGCTAGGTAAAAAACATTTTTTTTTTTTT @A@??:A?BACAA>BB@A<=>AABAAB@B?CAB>A?BAACAC@AAC?@C>C<@A<<<<;<;@@:>=< MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GGGECGGGGGGGGCGGGGBBBGGGGGGGGGGFFEFEGGGGGGGGGGGCGEGEEGEE69*<5<@@GGADGFEGEGCD?EE4@BGGGGG?BBBBBBBGGGGGG UQ:i:0 +20GAVAAXX100126:8:42:6558:108923 83 chr1 10007800 60 101M = 10007468 -432 CGGTGAGGCATAACTTTTATTTCAGTCTCACATTTCAGTTTGTTCTGTGGTCTCCTCCTTTATTTGTTGCTAGGTAAAAAACATTTTTTTTTTTTTTTTTT 8D>DCD?A?=?;C>@=D>9DD9:<896::67?DADB=28<>/>@7<37@;701+.4=3<@=+;;<1>=;7:@?>1<;AAAA=:????>>??>>>>DDBDDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:GHGHHHDFDBDBHCC@HE=HH55544;=<6;EHGHH?6;@A3AD9>8:C?6022/3A7;:??=BBBAB89:1:::,9:D>D=DBB?@B==:8=:<=88CAC>>AB3AAC8?CB:C?BA:=@DD>@=1>AAAAAAAAAAAADDDDDCCCC@@DC MD:Z:0C100 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:#CBFFB<@CD@BDCBDCABCBACBBA################### MD:Z:83 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCBBBBCBBBBBCCBBBCBCBBBBBBBABB@BBBBBBBBBABBBBBAABBBABB@@@@AA@?@BAB@AAAB@B@A@B@BAB################### UQ:i:0 +20GAVAAXX100126:8:3:8873:24347 163 chr1 10007819 60 83M18S = 10008145 426 TTTCAGTCTCACATTTCAGTTTGTTCTGTGGTCTCCTCCTTTATTTGTTGCTAGGTAAAAAACATTTTTTTTTTTTTTTTTTTTTTGGTTTCCAGGGAATT @BAAAA@BDBCACABBBCCACCBACBDCBCCBBDABBAACBCBCACCBCC@DAADC@DBCBB??BD@CDCACDBBBBBC?BA################### MD:Z:83 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCBBBBCBBBBBCCCBBCBCCBBBBBBABBABBBBBBBBBBBBBABABBBB@BB?@A@AA@@?AAAABAB@AA@BAAB>AB################### UQ:i:0 +20GAVAAXX100126:8:3:12227:40820 659 chr1 10007841 60 51S50M = 10007513 -377 GTGAGCTGCGACGTGAGGCATAACTTTTATTTCAGTCTCTCATTTCAGTTTGTTCTGTGGTCTCCTCATTTATTTGTTGCTAGGTAAAAAACCTTTTTTTT ####################################################@9B@-3A='&77,''&9=@6%343A>4A.43<2B456B;A'4C76:>D@ MD:Z:16C24A8 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:####################################################?5@?//A@''9<-'''78@4%.12?:3@-02;1@277@7@'1B627=BB UQ:i:11 +20GAVAAXX100126:8:63:18841:113901 595 chr1 10007842 60 66S35M = 10007517 -359 ACTCCTGACCTCAGGTGATCTGCCCGCTGAGGCATAACTTTTATTTCAGTATCACATTTCAGTTTGTTCTGTGGTCTGCTCCTTTATTTGTTGCTAGGTAA ##################################################################################################### MD:Z:11C23 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:##################################################################################################### UQ:i:2 +20GAVAAXX100126:8:68:20495:7123 147 chr1 10007858 60 55S46M = 10007491 -412 TGAGGCATAACTTTTATTTCAGTCTCACATTTCAGTTTGTTCTGTGGTCTCCTCCTTTTTTTGTTGCTAGGTAAAAAACATTTTTTTTTTTTTTTTTTTTG ########################################################@B);C9,?33277?85ACCCC92(ADDDDDDBCDCCCCCBBCCC@ MD:Z:3A40G0C0 PG:Z:BWA RG:Z:20GAV.8 AM:i:25 NM:i:3 SM:i:25 MQ:i:60 OQ:Z:########################################################A?)BCCB5.(>BBBBBBBBBBBBBBBBBBBB UQ:i:73 +20GAVAAXX100126:8:64:7784:120262 163 chr1 10007860 60 42M59S = 10008147 387 TATTTGTTGCTAGGTAAAAAACATTTTTTTTTTTTTTTTTTTTCTGGTTTCCGGGAATTTTAAAAAATAACTAAAGGCCCGTTTCTTTGGAGTTTCTCAGG @CBB?A@BBBDCBABCDDDDD@BCCCCCCCCCCCBBABBB?############################################################ MD:Z:42 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCBCBBBBCBBCC@BBBBBBAABCCBBBBBBBBBBBBBB@############################################################ UQ:i:0 +20GAVAAXX100126:8:61:17876:93642 147 chr1 10007872 29 66S35M = 10007468 -438 GGATTAACTTTTTTTTCATCTTCACTTTCAGGTTTGTCTTGTGGTCCCCCCTTTTTTTTTTTCCAGGGAAAAAAAATTTTTTTTTTTTTTTTTTTTGCTGG #####################################################################ACCCA-')CEEEEEDEDDBCDCCCCCBABBC@ MD:Z:2T6C0A24 PG:Z:BWA RG:Z:20GAV.8 AM:i:29 NM:i:3 SM:i:29 MQ:i:29 OQ:Z:#####################################################################?BBB?-''ACCCCCBCBBBBCBBBBBBBBBBB UQ:i:16 +20GAVAAXX100126:8:62:20479:199378 99 chr1 10007875 60 8M1D78M15S = 10008195 420 AAAAAACATTTTTTTTTTTTTTTTTTGCTGGTTTCCAGGTAATTCTAAGAACTAACTAAGGTCCTGTTGCTTGGTGGTTACTAAGGGGGGCAGACAGAGTA CDDBDDABBDDDDDAAAAAAAAAA@3+36;;@@@?A?@@;:=:<.6;;09;BFFFFGCGF=><=?/34504FDDGF34555HBHCF54445555531-425FBDBF################ UQ:i:16 +20GAVAAXX100126:8:7:17495:193398 147 chr1 10007961 60 101M = 10007724 -337 GGAGGCAGACAGAGTATTTTGCACCATCCAAAGGAGGTAATGTAATGCTTAAATGGAAGAAACTGAACCAAACAACGAAAATATGGGCTATGTATTACAAA CD<4A<8)A9:C9%5?-ADA6@AA=@48=4A<>A<:;4?1;89@>AC:AA?;;B?C>>@<;@BBBBBB>5<>>A>>=:>BB8=>A?A?ABBCCCC?CACCCCBCCBB@B;ABBB UQ:i:0 +20GAVAAXX100126:8:63:6242:80411 83 chr1 10007988 60 101M = 10007707 -381 CCAAAGGAGGTAATGTAATGCTTAAATGGAAGAAACTGAACCAAACAACGAAAATATGGGCTATGTATTACAAACAACTCTGACACACTGTTATTTATGAT DDDDDCCDCACDCCACDCCBCDCDDCCBBDDBDDBCCBDCCBDDCBDB;BDDDCCCCBBBCCCCACCDCBBDDBBDBCDCCBBBBBBCCADCCDDCCABCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:44:5440:118213 83 chr1 10008043 60 101M = 10007743 -400 ATGGGCTATGTATTACAAACAACTCTGACACACTGTTATTTATGATATTATAGGAAAGCTTCTCCCAAGGAAAGTGGTGGAAACTCCATCACTTAGACAAG DECCCCCCCACCDCCBDDCBDCCDCCBBBBBCCCADCCDDCCCBCCCDCCCCBBDDCBCDDCDCCBDCBBDDDACBACBBDDBCDCBCDBCCDCDBC@CDC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:4:14239:56438 611 chr1 10008048 60 35M66S = 10008306 358 CTATGTATTACAAACAACTCTGACACACTGTTATTTATGATATTATAGGAAAGCTTCTCCCAAGGAAAGTGGTGGAAGATCCATCACTTAGACAAGTTAGA ##################################################################################################### MD:Z:35 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:##################################################################################################### UQ:i:0 +20GAVAAXX100126:8:68:12222:58992 147 chr1 10008056 60 101M = 10007740 -416 TACAAACAACTCTGACACACTGTTATTTATGATATTATAGGAAAGCTTCTCCCAAGGAAAGTGGTGGAAACTCCATCACTTAGACAAGTTAGAACACAACT BA:CD@CBCAAC@ACCBACAB@DAACCADD:CCC?CCBDC@CAD@CD@CCADBCDD@CCD@BA>>@@CCBCCCCCCCACEDECACBD@DCDBCAA@?@AC@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:A77=AAB>AABCC?DCCCD@CABDCCDACBCBBCC?D@B?CD?CAA?BBBA@@@ECC?D@BAACACC@DC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBCBBBCBBBBBBBBBBBBBBBBCBBBBBBBBB@BB@BAABABBBBBBBBABBBBBBAAB@ABB@B@@AAABAB???@A@7@ABAA?A?A@A?@@@ UQ:i:0 +20GAVAAXX100126:8:45:11956:153764 1699 chr1 10008108 29 56M1D3M1I41M = 10008497 423 CAAGGAAAGTGGTGGACACTCCATCACTTAGACAAGTTAGCACACCCGTGTAAAGGACTCATTTAGTGTCTCAGTGACATTTCTACTGTAATAGCCACTGG >:5=A7=C?A?8+5<:-+76;A86)B12C9>C45);641+&B2/?+4%,775+)A4,B+1))8A/)@1A28')<''(0>AA))A2)7*BB,BBB7(-)B2*(@<(.1)4@B@ACCDD@DBCCCACACCBCCAC?B;AA@?BAC=BCACB=BC@CB?AACB@C?CC:AABACBABAB@DA@@DAACDABAC>4CBCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBABBB@BB?BBBBBABBBBBABBABABABA@B?ABA@ABBAAABAA?BAAA@;@BAB>A?@A<@:AA@@A@@@?A@?@A@@@??<@@;>??>2@@>A UQ:i:0 +20GAVAAXX100126:8:3:8864:24332 1107 chr1 10008145 60 101M = 10007819 -426 TAGAACACAACTGTAAAGGTACTAAAAAGTGTATTAGAGAGATTCATCCTGTAAAAGACCCTGGGGGCTGAGTGTGGTGGCTCATGCCTGTAACCCTAACA DECDBCCCDCCCACDDDCACCCCDDDDDACACCDCDBDBDBCDDBCDCCCACDDDCBBCCCCBBBBBCCBDACACBACBBCDBCCBCCCACDCCCCCBBBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:3:8873:24347 83 chr1 10008145 60 101M = 10007819 -426 TAGAACACAACTGTAAAGGTACTAAAAAGTGTATTAGAGAGATTCATCCTGTAAAAGACCCTGGGGGCTGAGTGTGGTGGCTCATGCCTGTAACCCTAACA DECDCCCCDCCCACDDDCACCCCDDDDDACACCDCDBDBDBCDDBCDCCCACDDDCBACCCCBBBBBCCBDACACBACBBCDBCCBCCCACDCCCCCBBBC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:64:7784:120262 83 chr1 10008147 60 101M = 10007860 -387 GAACACAACTGTAAAGGTACTAAAAAGTGTATTAGAGAGATTCATCCTGTAAAAGACCCTGGGGGCTGAGTGTGGTGGCTCATGCCTGTAACCCTAACACT DECCCCDCCCACDDDCACCCCDDDDDACACCDCDBDBDBCDDBCDCCCACDDDDBACCCCBBBBBCCBCACACBACBBCDBCCBCCCACDCCCCCDC@BCC MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:4:10255:86573 99 chr1 10008172 60 101M = 10008499 427 AGTGTATTAGAGAGATTCATCCTGTAAAAGACCCTGGGGGCTGAGTGTGGTGGCTCATGCCTGTAACCCTAACACTTTGGGAGGGTGAAGCAGAAGGATCA CCB@CCCDCCDCDCDCDBCCBBCBBCDDDCDABBCBCCCCBCBDCBBBBCBBCBDBCCBBBDBCCD=BBDCDACADDDBCCDCCB7)(=DB@@B MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:@AB?BCCBCBABCCCCCCCCCCCCCCCBBCCCBCA?BBABBCBBBBBBBBBBCA:@B>B34B7.8.3::%=859B=5:3(':@>>8C UQ:i:0 +20GAVAAXX100126:8:5:20728:120252 163 chr1 10008188 60 93M8S = 10008458 370 TCATCCTGTAAAAGACCCTGGGGGCTGAGTGTGGTGGCTCATGCCTGTAACCCTAACACTTTGGGAGGGTGAAGCAGAAGGATCACTTGAGTCCAGGAGTT @BBBAABBABCDCABACCDBCCCCABBBC@CBBC::7<=<>>;?==B?>?7@C9AC???CBC@?A5?@B=<>B<:A==27=6;=@@>B77:B######### MD:Z:93 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBCBBBBBBB?BBBBBBBBBA@B?B=B?BA9==?????==@6?<:?=<.6=45==@6@519?######### UQ:i:0 +20GAVAAXX100126:8:62:20479:199378 147 chr1 10008195 60 101M = 10007875 -420 GTAAAAGACCCTGGGGGCTGAGTGTGGTGGCTCATGCCTGTAACCCTAACACTTTGGGAGGGTGAAGCAGAAGGATCACTTGAGTCCAGGAGTTTGAGACC ADB;@BB?C:AC@BDC7AD@C@D?BA@ABBACBB@?CBC@BC@D9CCAB?>CCDCCACCBB@?AAC@CDCBECBCCCBCEDCEBCCCBCCDADCA@AAAC@ MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BA@4;??.radio:first-child,.controls>.checkbox:first-child{padding-top:5px;} +.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle;} +.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px;} +.input-mini{width:60px;} +.input-small{width:90px;} +.input-medium{width:150px;} +.input-large{width:210px;} +.input-xlarge{width:270px;} +.input-xxlarge{width:530px;} +input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0;} +.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block;} +input,textarea,.uneditable-input{margin-left:0;} +input.span12, textarea.span12, .uneditable-input.span12{width:930px;} +input.span11, textarea.span11, .uneditable-input.span11{width:850px;} +input.span10, textarea.span10, .uneditable-input.span10{width:770px;} +input.span9, textarea.span9, .uneditable-input.span9{width:690px;} +input.span8, textarea.span8, .uneditable-input.span8{width:610px;} +input.span7, textarea.span7, .uneditable-input.span7{width:530px;} +input.span6, textarea.span6, .uneditable-input.span6{width:450px;} +input.span5, textarea.span5, .uneditable-input.span5{width:370px;} +input.span4, textarea.span4, .uneditable-input.span4{width:290px;} +input.span3, textarea.span3, .uneditable-input.span3{width:210px;} +input.span2, textarea.span2, .uneditable-input.span2{width:130px;} +input.span1, textarea.span1, .uneditable-input.span1{width:50px;} +input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eeeeee;border-color:#ddd;} +input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent;} +.control-group.warning>label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853;} +.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;border-color:#c09853;}.control-group.warning .checkbox:focus,.control-group.warning .radio:focus,.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:0 0 6px #dbc59e;-moz-box-shadow:0 0 6px #dbc59e;box-shadow:0 0 6px #dbc59e;} +.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853;} +.control-group.error>label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48;} +.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;border-color:#b94a48;}.control-group.error .checkbox:focus,.control-group.error .radio:focus,.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:0 0 6px #d59392;-moz-box-shadow:0 0 6px #d59392;box-shadow:0 0 6px #d59392;} +.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48;} +.control-group.success>label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847;} +.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;border-color:#468847;}.control-group.success .checkbox:focus,.control-group.success .radio:focus,.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:0 0 6px #7aba7b;-moz-box-shadow:0 0 6px #7aba7b;box-shadow:0 0 6px #7aba7b;} +.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847;} +input:focus:required:invalid,textarea:focus:required:invalid,select:focus:required:invalid{color:#b94a48;border-color:#ee5f5b;}input:focus:required:invalid:focus,textarea:focus:required:invalid:focus,select:focus:required:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7;} +.form-actions{padding:17px 20px 18px;margin-top:18px;margin-bottom:18px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1;}.form-actions:before,.form-actions:after{display:table;content:"";} +.form-actions:after{clear:both;} +.uneditable-input{overflow:hidden;white-space:nowrap;cursor:not-allowed;background-color:#ffffff;border-color:#eee;-webkit-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);-moz-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);} +:-moz-placeholder{color:#999999;} +:-ms-input-placeholder{color:#999999;} +::-webkit-input-placeholder{color:#999999;} +.help-block,.help-inline{color:#555555;} +.help-block{display:block;margin-bottom:9px;} +.help-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle;padding-left:5px;} +.input-prepend,.input-append{margin-bottom:5px;}.input-prepend input,.input-append input,.input-prepend select,.input-append select,.input-prepend .uneditable-input,.input-append .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;vertical-align:middle;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0;}.input-prepend input:focus,.input-append input:focus,.input-prepend select:focus,.input-append select:focus,.input-prepend .uneditable-input:focus,.input-append .uneditable-input:focus{z-index:2;} +.input-prepend .uneditable-input,.input-append .uneditable-input{border-left-color:#ccc;} +.input-prepend .add-on,.input-append .add-on{display:inline-block;width:auto;height:18px;min-width:16px;padding:4px 5px;font-weight:normal;line-height:18px;text-align:center;text-shadow:0 1px 0 #ffffff;vertical-align:middle;background-color:#eeeeee;border:1px solid #ccc;} +.input-prepend .add-on,.input-append .add-on,.input-prepend .btn,.input-append .btn{margin-left:-1px;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.input-prepend .active,.input-append .active{background-color:#a9dba9;border-color:#46a546;} +.input-prepend .add-on,.input-prepend .btn{margin-right:-1px;} +.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px;} +.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px;} +.input-append .uneditable-input{border-right-color:#ccc;border-left-color:#eee;} +.input-append .add-on:last-child,.input-append .btn:last-child{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0;} +.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px;} +.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0;} +.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:14px;-moz-border-radius:14px;border-radius:14px;} +.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;*zoom:1;margin-bottom:0;} +.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none;} +.form-search label,.form-inline label{display:inline-block;} +.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0;} +.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle;} +.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0;} +.control-group{margin-bottom:9px;} +legend+.control-group{margin-top:18px;-webkit-margin-top-collapse:separate;} +.form-horizontal .control-group{margin-bottom:18px;*zoom:1;}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;content:"";} +.form-horizontal .control-group:after{clear:both;} +.form-horizontal .control-label{float:left;width:140px;padding-top:5px;text-align:right;} +.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:160px;*margin-left:0;}.form-horizontal .controls:first-child{*padding-left:160px;} +.form-horizontal .help-block{margin-top:9px;margin-bottom:0;} +.form-horizontal .form-actions{padding-left:160px;} +.btn{display:inline-block;*display:inline;*zoom:1;padding:4px 10px 4px;margin-bottom:0;font-size:13px;line-height:18px;*line-height:20px;color:#333333;text-align:center;text-shadow:0 1px 1px rgba(255, 255, 255, 0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;background-image:-moz-linear-gradient(top, #ffffff, #e6e6e6);background-image:-ms-linear-gradient(top, #ffffff, #e6e6e6);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#e6e6e6));background-image:-webkit-linear-gradient(top, #ffffff, #e6e6e6);background-image:-o-linear-gradient(top, #ffffff, #e6e6e6);background-image:linear-gradient(top, #ffffff, #e6e6e6);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffff', endColorstr='#e6e6e6', GradientType=0);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#e6e6e6;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);border:1px solid #cccccc;*border:0;border-bottom-color:#b3b3b3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;*margin-left:.3em;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);}.btn:hover,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{background-color:#e6e6e6;*background-color:#d9d9d9;} +.btn:active,.btn.active{background-color:#cccccc \9;} +.btn:first-child{*margin-left:0;} +.btn:hover{color:#333333;text-decoration:none;background-color:#e6e6e6;*background-color:#d9d9d9;background-position:0 -15px;-webkit-transition:background-position 0.1s linear;-moz-transition:background-position 0.1s linear;-ms-transition:background-position 0.1s linear;-o-transition:background-position 0.1s linear;transition:background-position 0.1s linear;} +.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px;} +.btn.active,.btn:active{background-color:#e6e6e6;background-color:#d9d9d9 \9;background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);} +.btn.disabled,.btn[disabled]{cursor:default;background-color:#e6e6e6;background-image:none;opacity:0.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;} +.btn-large{padding:9px 14px;font-size:15px;line-height:normal;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px;} +.btn-large [class^="icon-"]{margin-top:1px;} +.btn-small{padding:5px 9px;font-size:11px;line-height:16px;} +.btn-small [class^="icon-"]{margin-top:-1px;} +.btn-mini{padding:2px 6px;font-size:11px;line-height:14px;} +.btn-primary,.btn-primary:hover,.btn-warning,.btn-warning:hover,.btn-danger,.btn-danger:hover,.btn-success,.btn-success:hover,.btn-info,.btn-info:hover,.btn-inverse,.btn-inverse:hover{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);} +.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255, 255, 255, 0.75);} +.btn{border-color:#ccc;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);} +.btn-primary{background-color:#0074cc;background-image:-moz-linear-gradient(top, #0088cc, #0055cc);background-image:-ms-linear-gradient(top, #0088cc, #0055cc);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#0088cc), to(#0055cc));background-image:-webkit-linear-gradient(top, #0088cc, #0055cc);background-image:-o-linear-gradient(top, #0088cc, #0055cc);background-image:linear-gradient(top, #0088cc, #0055cc);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#0088cc', endColorstr='#0055cc', GradientType=0);border-color:#0055cc #0055cc #003580;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#0055cc;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-primary:hover,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{background-color:#0055cc;*background-color:#004ab3;} +.btn-primary:active,.btn-primary.active{background-color:#004099 \9;} +.btn-warning{background-color:#faa732;background-image:-moz-linear-gradient(top, #fbb450, #f89406);background-image:-ms-linear-gradient(top, #fbb450, #f89406);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#fbb450), to(#f89406));background-image:-webkit-linear-gradient(top, #fbb450, #f89406);background-image:-o-linear-gradient(top, #fbb450, #f89406);background-image:linear-gradient(top, #fbb450, #f89406);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fbb450', endColorstr='#f89406', GradientType=0);border-color:#f89406 #f89406 #ad6704;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#f89406;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-warning:hover,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{background-color:#f89406;*background-color:#df8505;} +.btn-warning:active,.btn-warning.active{background-color:#c67605 \9;} +.btn-danger{background-color:#da4f49;background-image:-moz-linear-gradient(top, #ee5f5b, #bd362f);background-image:-ms-linear-gradient(top, #ee5f5b, #bd362f);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ee5f5b), to(#bd362f));background-image:-webkit-linear-gradient(top, #ee5f5b, #bd362f);background-image:-o-linear-gradient(top, #ee5f5b, #bd362f);background-image:linear-gradient(top, #ee5f5b, #bd362f);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ee5f5b', endColorstr='#bd362f', GradientType=0);border-color:#bd362f #bd362f #802420;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#bd362f;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-danger:hover,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{background-color:#bd362f;*background-color:#a9302a;} +.btn-danger:active,.btn-danger.active{background-color:#942a25 \9;} +.btn-success{background-color:#5bb75b;background-image:-moz-linear-gradient(top, #62c462, #51a351);background-image:-ms-linear-gradient(top, #62c462, #51a351);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#62c462), to(#51a351));background-image:-webkit-linear-gradient(top, #62c462, #51a351);background-image:-o-linear-gradient(top, #62c462, #51a351);background-image:linear-gradient(top, #62c462, #51a351);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#62c462', endColorstr='#51a351', GradientType=0);border-color:#51a351 #51a351 #387038;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#51a351;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-success:hover,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{background-color:#51a351;*background-color:#499249;} +.btn-success:active,.btn-success.active{background-color:#408140 \9;} +.btn-info{background-color:#49afcd;background-image:-moz-linear-gradient(top, #5bc0de, #2f96b4);background-image:-ms-linear-gradient(top, #5bc0de, #2f96b4);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#5bc0de), to(#2f96b4));background-image:-webkit-linear-gradient(top, #5bc0de, #2f96b4);background-image:-o-linear-gradient(top, #5bc0de, #2f96b4);background-image:linear-gradient(top, #5bc0de, #2f96b4);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#5bc0de', endColorstr='#2f96b4', GradientType=0);border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#2f96b4;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-info:hover,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{background-color:#2f96b4;*background-color:#2a85a0;} +.btn-info:active,.btn-info.active{background-color:#24748c \9;} +.btn-inverse{background-color:#414141;background-image:-moz-linear-gradient(top, #555555, #222222);background-image:-ms-linear-gradient(top, #555555, #222222);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#555555), to(#222222));background-image:-webkit-linear-gradient(top, #555555, #222222);background-image:-o-linear-gradient(top, #555555, #222222);background-image:linear-gradient(top, #555555, #222222);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#555555', endColorstr='#222222', GradientType=0);border-color:#222222 #222222 #000000;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#222222;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-inverse:hover,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{background-color:#222222;*background-color:#151515;} +.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9;} +button.btn,input[type="submit"].btn{*padding-top:2px;*padding-bottom:2px;}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0;} +button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px;} +button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px;} +button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px;} +[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../../../Downloads/bootstrap (1)/img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat;}[class^="icon-"]:last-child,[class*=" icon-"]:last-child{*margin-left:0;} +.icon-white{background-image:url("../../../Downloads/bootstrap (1)/img/glyphicons-halflings-white.png");} +.icon-glass{background-position:0 0;} +.icon-music{background-position:-24px 0;} +.icon-search{background-position:-48px 0;} +.icon-envelope{background-position:-72px 0;} +.icon-heart{background-position:-96px 0;} +.icon-star{background-position:-120px 0;} +.icon-star-empty{background-position:-144px 0;} +.icon-user{background-position:-168px 0;} +.icon-film{background-position:-192px 0;} +.icon-th-large{background-position:-216px 0;} +.icon-th{background-position:-240px 0;} +.icon-th-list{background-position:-264px 0;} +.icon-ok{background-position:-288px 0;} +.icon-remove{background-position:-312px 0;} +.icon-zoom-in{background-position:-336px 0;} +.icon-zoom-out{background-position:-360px 0;} +.icon-off{background-position:-384px 0;} +.icon-signal{background-position:-408px 0;} +.icon-cog{background-position:-432px 0;} +.icon-trash{background-position:-456px 0;} +.icon-home{background-position:0 -24px;} +.icon-file{background-position:-24px -24px;} +.icon-time{background-position:-48px -24px;} +.icon-road{background-position:-72px -24px;} +.icon-download-alt{background-position:-96px -24px;} +.icon-download{background-position:-120px -24px;} +.icon-upload{background-position:-144px -24px;} +.icon-inbox{background-position:-168px -24px;} +.icon-play-circle{background-position:-192px -24px;} +.icon-repeat{background-position:-216px -24px;} +.icon-refresh{background-position:-240px -24px;} +.icon-list-alt{background-position:-264px -24px;} +.icon-lock{background-position:-287px -24px;} +.icon-flag{background-position:-312px -24px;} +.icon-headphones{background-position:-336px -24px;} +.icon-volume-off{background-position:-360px -24px;} +.icon-volume-down{background-position:-384px -24px;} +.icon-volume-up{background-position:-408px -24px;} +.icon-qrcode{background-position:-432px -24px;} +.icon-barcode{background-position:-456px -24px;} +.icon-tag{background-position:0 -48px;} +.icon-tags{background-position:-25px -48px;} +.icon-book{background-position:-48px -48px;} +.icon-bookmark{background-position:-72px -48px;} +.icon-print{background-position:-96px -48px;} +.icon-camera{background-position:-120px -48px;} +.icon-font{background-position:-144px -48px;} +.icon-bold{background-position:-167px -48px;} +.icon-italic{background-position:-192px -48px;} +.icon-text-height{background-position:-216px -48px;} +.icon-text-width{background-position:-240px -48px;} +.icon-align-left{background-position:-264px -48px;} +.icon-align-center{background-position:-288px -48px;} +.icon-align-right{background-position:-312px -48px;} +.icon-align-justify{background-position:-336px -48px;} +.icon-list{background-position:-360px -48px;} +.icon-indent-left{background-position:-384px -48px;} +.icon-indent-right{background-position:-408px -48px;} +.icon-facetime-video{background-position:-432px -48px;} +.icon-picture{background-position:-456px -48px;} +.icon-pencil{background-position:0 -72px;} +.icon-map-marker{background-position:-24px -72px;} +.icon-adjust{background-position:-48px -72px;} +.icon-tint{background-position:-72px -72px;} +.icon-edit{background-position:-96px -72px;} +.icon-share{background-position:-120px -72px;} +.icon-check{background-position:-144px -72px;} +.icon-move{background-position:-168px -72px;} +.icon-step-backward{background-position:-192px -72px;} +.icon-fast-backward{background-position:-216px -72px;} +.icon-backward{background-position:-240px -72px;} +.icon-play{background-position:-264px -72px;} +.icon-pause{background-position:-288px -72px;} +.icon-stop{background-position:-312px -72px;} +.icon-forward{background-position:-336px -72px;} +.icon-fast-forward{background-position:-360px -72px;} +.icon-step-forward{background-position:-384px -72px;} +.icon-eject{background-position:-408px -72px;} +.icon-chevron-left{background-position:-432px -72px;} +.icon-chevron-right{background-position:-456px -72px;} +.icon-plus-sign{background-position:0 -96px;} +.icon-minus-sign{background-position:-24px -96px;} +.icon-remove-sign{background-position:-48px -96px;} +.icon-ok-sign{background-position:-72px -96px;} +.icon-question-sign{background-position:-96px -96px;} +.icon-info-sign{background-position:-120px -96px;} +.icon-screenshot{background-position:-144px -96px;} +.icon-remove-circle{background-position:-168px -96px;} +.icon-ok-circle{background-position:-192px -96px;} +.icon-ban-circle{background-position:-216px -96px;} +.icon-arrow-left{background-position:-240px -96px;} +.icon-arrow-right{background-position:-264px -96px;} +.icon-arrow-up{background-position:-289px -96px;} +.icon-arrow-down{background-position:-312px -96px;} +.icon-share-alt{background-position:-336px -96px;} +.icon-resize-full{background-position:-360px -96px;} +.icon-resize-small{background-position:-384px -96px;} +.icon-plus{background-position:-408px -96px;} +.icon-minus{background-position:-433px -96px;} +.icon-asterisk{background-position:-456px -96px;} +.icon-exclamation-sign{background-position:0 -120px;} +.icon-gift{background-position:-24px -120px;} +.icon-leaf{background-position:-48px -120px;} +.icon-fire{background-position:-72px -120px;} +.icon-eye-open{background-position:-96px -120px;} +.icon-eye-close{background-position:-120px -120px;} +.icon-warning-sign{background-position:-144px -120px;} +.icon-plane{background-position:-168px -120px;} +.icon-calendar{background-position:-192px -120px;} +.icon-random{background-position:-216px -120px;} +.icon-comment{background-position:-240px -120px;} +.icon-magnet{background-position:-264px -120px;} +.icon-chevron-up{background-position:-288px -120px;} +.icon-chevron-down{background-position:-313px -119px;} +.icon-retweet{background-position:-336px -120px;} +.icon-shopping-cart{background-position:-360px -120px;} +.icon-folder-close{background-position:-384px -120px;} +.icon-folder-open{background-position:-408px -120px;} +.icon-resize-vertical{background-position:-432px -119px;} +.icon-resize-horizontal{background-position:-456px -118px;} +.icon-hdd{background-position:0 -144px;} +.icon-bullhorn{background-position:-24px -144px;} +.icon-bell{background-position:-48px -144px;} +.icon-certificate{background-position:-72px -144px;} +.icon-thumbs-up{background-position:-96px -144px;} +.icon-thumbs-down{background-position:-120px -144px;} +.icon-hand-right{background-position:-144px -144px;} +.icon-hand-left{background-position:-168px -144px;} +.icon-hand-up{background-position:-192px -144px;} +.icon-hand-down{background-position:-216px -144px;} +.icon-circle-arrow-right{background-position:-240px -144px;} +.icon-circle-arrow-left{background-position:-264px -144px;} +.icon-circle-arrow-up{background-position:-288px -144px;} +.icon-circle-arrow-down{background-position:-312px -144px;} +.icon-globe{background-position:-336px -144px;} +.icon-wrench{background-position:-360px -144px;} +.icon-tasks{background-position:-384px -144px;} +.icon-filter{background-position:-408px -144px;} +.icon-briefcase{background-position:-432px -144px;} +.icon-fullscreen{background-position:-456px -144px;} +.btn-group{position:relative;*zoom:1;*margin-left:.3em;}.btn-group:before,.btn-group:after{display:table;content:"";} +.btn-group:after{clear:both;} +.btn-group:first-child{*margin-left:0;} +.btn-group+.btn-group{margin-left:5px;} +.btn-toolbar{margin-top:9px;margin-bottom:9px;}.btn-toolbar .btn-group{display:inline-block;*display:inline;*zoom:1;} +.btn-group>.btn{position:relative;float:left;margin-left:-1px;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.btn-group>.btn:first-child{margin-left:0;-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;} +.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;} +.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-top-left-radius:6px;-moz-border-radius-topleft:6px;border-top-left-radius:6px;-webkit-border-bottom-left-radius:6px;-moz-border-radius-bottomleft:6px;border-bottom-left-radius:6px;} +.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;-moz-border-radius-topright:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;-moz-border-radius-bottomright:6px;border-bottom-right-radius:6px;} +.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2;} +.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0;} +.btn-group>.dropdown-toggle{padding-left:8px;padding-right:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);*padding-top:4px;*padding-bottom:4px;} +.btn-group>.btn-mini.dropdown-toggle{padding-left:5px;padding-right:5px;} +.btn-group>.btn-small.dropdown-toggle{*padding-top:4px;*padding-bottom:4px;} +.btn-group>.btn-large.dropdown-toggle{padding-left:12px;padding-right:12px;} +.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);} +.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6;} +.btn-group.open .btn-primary.dropdown-toggle{background-color:#0055cc;} +.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406;} +.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f;} +.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351;} +.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4;} +.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222222;} +.btn .caret{margin-top:7px;margin-left:0;} +.btn:hover .caret,.open.btn-group .caret{opacity:1;filter:alpha(opacity=100);} +.btn-mini .caret{margin-top:5px;} +.btn-small .caret{margin-top:6px;} +.btn-large .caret{margin-top:6px;border-left-width:5px;border-right-width:5px;border-top-width:5px;} +.dropup .btn-large .caret{border-bottom:5px solid #000000;border-top:0;} +.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;opacity:0.75;filter:alpha(opacity=75);} +.nav{margin-left:0;margin-bottom:18px;list-style:none;} +.nav>li>a{display:block;} +.nav>li>a:hover{text-decoration:none;background-color:#eeeeee;} +.nav>.pull-right{float:right;} +.nav .nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:18px;color:#999999;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);text-transform:uppercase;} +.nav li+.nav-header{margin-top:9px;} +.nav-list{padding-left:15px;padding-right:15px;margin-bottom:0;} +.nav-list>li>a,.nav-list .nav-header{margin-left:-15px;margin-right:-15px;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);} +.nav-list>li>a{padding:3px 15px;} +.nav-list>.active>a,.nav-list>.active>a:hover{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.2);background-color:#0088cc;} +.nav-list [class^="icon-"]{margin-right:2px;} +.nav-list .divider{*width:100%;height:1px;margin:8px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #ffffff;} +.nav-tabs,.nav-pills{*zoom:1;}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;content:"";} +.nav-tabs:after,.nav-pills:after{clear:both;} +.nav-tabs>li,.nav-pills>li{float:left;} +.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px;} +.nav-tabs{border-bottom:1px solid #ddd;} +.nav-tabs>li{margin-bottom:-1px;} +.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:18px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0;}.nav-tabs>li>a:hover{border-color:#eeeeee #eeeeee #dddddd;} +.nav-tabs>.active>a,.nav-tabs>.active>a:hover{color:#555555;background-color:#ffffff;border:1px solid #ddd;border-bottom-color:transparent;cursor:default;} +.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px;} +.nav-pills>.active>a,.nav-pills>.active>a:hover{color:#ffffff;background-color:#0088cc;} +.nav-stacked>li{float:none;} +.nav-stacked>li>a{margin-right:0;} +.nav-tabs.nav-stacked{border-bottom:0;} +.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0;} +.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px;} +.nav-tabs.nav-stacked>li>a:hover{border-color:#ddd;z-index:2;} +.nav-pills.nav-stacked>li>a{margin-bottom:3px;} +.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px;} +.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 5px 5px;-moz-border-radius:0 0 5px 5px;border-radius:0 0 5px 5px;} +.nav-pills .dropdown-menu{-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.nav-tabs .dropdown-toggle .caret,.nav-pills .dropdown-toggle .caret{border-top-color:#0088cc;border-bottom-color:#0088cc;margin-top:6px;} +.nav-tabs .dropdown-toggle:hover .caret,.nav-pills .dropdown-toggle:hover .caret{border-top-color:#005580;border-bottom-color:#005580;} +.nav-tabs .active .dropdown-toggle .caret,.nav-pills .active .dropdown-toggle .caret{border-top-color:#333333;border-bottom-color:#333333;} +.nav>.dropdown.active>a:hover{color:#000000;cursor:pointer;} +.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover{color:#ffffff;background-color:#999999;border-color:#999999;} +.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;opacity:1;filter:alpha(opacity=100);} +.tabs-stacked .open>a:hover{border-color:#999999;} +.tabbable{*zoom:1;}.tabbable:before,.tabbable:after{display:table;content:"";} +.tabbable:after{clear:both;} +.tab-content{overflow:auto;} +.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0;} +.tab-content>.tab-pane,.pill-content>.pill-pane{display:none;} +.tab-content>.active,.pill-content>.active{display:block;} +.tabs-below>.nav-tabs{border-top:1px solid #ddd;} +.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0;} +.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px;}.tabs-below>.nav-tabs>li>a:hover{border-bottom-color:transparent;border-top-color:#ddd;} +.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover{border-color:transparent #ddd #ddd #ddd;} +.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none;} +.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px;} +.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd;} +.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;} +.tabs-left>.nav-tabs>li>a:hover{border-color:#eeeeee #dddddd #eeeeee #eeeeee;} +.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover{border-color:#ddd transparent #ddd #ddd;*border-right-color:#ffffff;} +.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd;} +.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;} +.tabs-right>.nav-tabs>li>a:hover{border-color:#eeeeee #eeeeee #eeeeee #dddddd;} +.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover{border-color:#ddd #ddd #ddd transparent;*border-left-color:#ffffff;} +.navbar{*position:relative;*z-index:2;overflow:visible;margin-bottom:18px;} +.navbar-inner{min-height:40px;padding-left:20px;padding-right:20px;background-color:#2c2c2c;background-image:-moz-linear-gradient(top, #333333, #222222);background-image:-ms-linear-gradient(top, #333333, #222222);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#333333), to(#222222));background-image:-webkit-linear-gradient(top, #333333, #222222);background-image:-o-linear-gradient(top, #333333, #222222);background-image:linear-gradient(top, #333333, #222222);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#333333', endColorstr='#222222', GradientType=0);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,.25), inset 0 -1px 0 rgba(0,0,0,.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,.25), inset 0 -1px 0 rgba(0,0,0,.1);box-shadow:0 1px 3px rgba(0,0,0,.25), inset 0 -1px 0 rgba(0,0,0,.1);} +.navbar .container{width:auto;} +.nav-collapse.collapse{height:auto;} +.navbar{color:#999999;}.navbar .brand:hover{text-decoration:none;} +.navbar .brand{float:left;display:block;padding:8px 20px 12px;margin-left:-20px;font-size:20px;font-weight:200;line-height:1;color:#999999;} +.navbar .navbar-text{margin-bottom:0;line-height:40px;} +.navbar .navbar-link{color:#999999;}.navbar .navbar-link:hover{color:#ffffff;} +.navbar .btn,.navbar .btn-group{margin-top:5px;} +.navbar .btn-group .btn{margin:0;} +.navbar-form{margin-bottom:0;*zoom:1;}.navbar-form:before,.navbar-form:after{display:table;content:"";} +.navbar-form:after{clear:both;} +.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px;} +.navbar-form input,.navbar-form select{display:inline-block;margin-bottom:0;} +.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px;} +.navbar-form .input-append,.navbar-form .input-prepend{margin-top:6px;white-space:nowrap;}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0;} +.navbar-search{position:relative;float:left;margin-top:6px;margin-bottom:0;}.navbar-search .search-query{padding:4px 9px;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;color:#ffffff;background-color:#626262;border:1px solid #151515;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);-webkit-transition:none;-moz-transition:none;-ms-transition:none;-o-transition:none;transition:none;}.navbar-search .search-query:-moz-placeholder{color:#cccccc;} +.navbar-search .search-query:-ms-input-placeholder{color:#cccccc;} +.navbar-search .search-query::-webkit-input-placeholder{color:#cccccc;} +.navbar-search .search-query:focus,.navbar-search .search-query.focused{padding:5px 10px;color:#333333;text-shadow:0 1px 0 #ffffff;background-color:#ffffff;border:0;-webkit-box-shadow:0 0 3px rgba(0, 0, 0, 0.15);-moz-box-shadow:0 0 3px rgba(0, 0, 0, 0.15);box-shadow:0 0 3px rgba(0, 0, 0, 0.15);outline:0;} +.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0;} +.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-left:0;padding-right:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px;} +.navbar-fixed-top{top:0;} +.navbar-fixed-bottom{bottom:0;} +.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0;} +.navbar .nav.pull-right{float:right;} +.navbar .nav>li{display:block;float:left;} +.navbar .nav>li>a{float:none;padding:9px 10px 11px;line-height:19px;color:#999999;text-decoration:none;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);} +.navbar .btn{display:inline-block;padding:4px 10px 4px;margin:5px 5px 6px;line-height:18px;} +.navbar .btn-group{margin:0;padding:5px 5px 6px;} +.navbar .nav>li>a:hover{background-color:transparent;color:#ffffff;text-decoration:none;} +.navbar .nav .active>a,.navbar .nav .active>a:hover{color:#ffffff;text-decoration:none;background-color:#222222;} +.navbar .divider-vertical{height:40px;width:1px;margin:0 9px;overflow:hidden;background-color:#222222;border-right:1px solid #333333;} +.navbar .nav.pull-right{margin-left:10px;margin-right:0;} +.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-left:5px;margin-right:5px;background-color:#2c2c2c;background-image:-moz-linear-gradient(top, #333333, #222222);background-image:-ms-linear-gradient(top, #333333, #222222);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#333333), to(#222222));background-image:-webkit-linear-gradient(top, #333333, #222222);background-image:-o-linear-gradient(top, #333333, #222222);background-image:linear-gradient(top, #333333, #222222);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#333333', endColorstr='#222222', GradientType=0);border-color:#222222 #222222 #000000;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#222222;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);}.navbar .btn-navbar:hover,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{background-color:#222222;*background-color:#151515;} +.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#080808 \9;} +.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);-moz-box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);} +.btn-navbar .icon-bar+.icon-bar{margin-top:3px;} +.navbar .dropdown-menu:before{content:'';display:inline-block;border-left:7px solid transparent;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-bottom-color:rgba(0, 0, 0, 0.2);position:absolute;top:-7px;left:9px;} +.navbar .dropdown-menu:after{content:'';display:inline-block;border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:6px solid #ffffff;position:absolute;top:-6px;left:10px;} +.navbar-fixed-bottom .dropdown-menu:before{border-top:7px solid #ccc;border-top-color:rgba(0, 0, 0, 0.2);border-bottom:0;bottom:-7px;top:auto;} +.navbar-fixed-bottom .dropdown-menu:after{border-top:6px solid #ffffff;border-bottom:0;bottom:-6px;top:auto;} +.navbar .nav li.dropdown .dropdown-toggle .caret,.navbar .nav li.dropdown.open .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;} +.navbar .nav li.dropdown.active .caret{opacity:1;filter:alpha(opacity=100);} +.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{background-color:transparent;} +.navbar .nav li.dropdown.active>.dropdown-toggle:hover{color:#ffffff;} +.navbar .pull-right .dropdown-menu,.navbar .dropdown-menu.pull-right{left:auto;right:0;}.navbar .pull-right .dropdown-menu:before,.navbar .dropdown-menu.pull-right:before{left:auto;right:12px;} +.navbar .pull-right .dropdown-menu:after,.navbar .dropdown-menu.pull-right:after{left:auto;right:13px;} +.breadcrumb{padding:7px 14px;margin:0 0 18px;list-style:none;background-color:#fbfbfb;background-image:-moz-linear-gradient(top, #ffffff, #f5f5f5);background-image:-ms-linear-gradient(top, #ffffff, #f5f5f5);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#f5f5f5));background-image:-webkit-linear-gradient(top, #ffffff, #f5f5f5);background-image:-o-linear-gradient(top, #ffffff, #f5f5f5);background-image:linear-gradient(top, #ffffff, #f5f5f5);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffff', endColorstr='#f5f5f5', GradientType=0);border:1px solid #ddd;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;-webkit-box-shadow:inset 0 1px 0 #ffffff;-moz-box-shadow:inset 0 1px 0 #ffffff;box-shadow:inset 0 1px 0 #ffffff;}.breadcrumb li{display:inline-block;*display:inline;*zoom:1;text-shadow:0 1px 0 #ffffff;} +.breadcrumb .divider{padding:0 5px;color:#999999;} +.breadcrumb .active a{color:#333333;} +.pagination{height:36px;margin:18px 0;} +.pagination ul{display:inline-block;*display:inline;*zoom:1;margin-left:0;margin-bottom:0;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;-webkit-box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);-moz-box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);} +.pagination li{display:inline;} +.pagination a{float:left;padding:0 14px;line-height:34px;text-decoration:none;border:1px solid #ddd;border-left-width:0;} +.pagination a:hover,.pagination .active a{background-color:#f5f5f5;} +.pagination .active a{color:#999999;cursor:default;} +.pagination .disabled span,.pagination .disabled a,.pagination .disabled a:hover{color:#999999;background-color:transparent;cursor:default;} +.pagination li:first-child a{border-left-width:1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px;} +.pagination li:last-child a{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0;} +.pagination-centered{text-align:center;} +.pagination-right{text-align:right;} +.pager{margin-left:0;margin-bottom:18px;list-style:none;text-align:center;*zoom:1;}.pager:before,.pager:after{display:table;content:"";} +.pager:after{clear:both;} +.pager li{display:inline;} +.pager a{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px;} +.pager a:hover{text-decoration:none;background-color:#f5f5f5;} +.pager .next a{float:right;} +.pager .previous a{float:left;} +.pager .disabled a,.pager .disabled a:hover{color:#999999;background-color:#fff;cursor:default;} +.thumbnails{margin-left:-20px;list-style:none;*zoom:1;}.thumbnails:before,.thumbnails:after{display:table;content:"";} +.thumbnails:after{clear:both;} +.row-fluid .thumbnails{margin-left:0;} +.thumbnails>li{float:left;margin-bottom:18px;margin-left:20px;} +.thumbnail{display:block;padding:4px;line-height:1;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:0 1px 1px rgba(0, 0, 0, 0.075);} +a.thumbnail:hover{border-color:#0088cc;-webkit-box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);-moz-box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);} +.thumbnail>img{display:block;max-width:100%;margin-left:auto;margin-right:auto;} +.thumbnail .caption{padding:9px;} +.alert{padding:8px 35px 8px 14px;margin-bottom:18px;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;color:#c09853;} +.alert-heading{color:inherit;} +.alert .close{position:relative;top:-2px;right:-21px;line-height:18px;} +.alert-success{background-color:#dff0d8;border-color:#d6e9c6;color:#468847;} +.alert-danger,.alert-error{background-color:#f2dede;border-color:#eed3d7;color:#b94a48;} +.alert-info{background-color:#d9edf7;border-color:#bce8f1;color:#3a87ad;} +.alert-block{padding-top:14px;padding-bottom:14px;} +.alert-block>p,.alert-block>ul{margin-bottom:0;} +.alert-block p+p{margin-top:5px;} +@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-o-keyframes progress-bar-stripes{from{background-position:0 0;} to{background-position:40px 0;}}@keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}.progress{overflow:hidden;height:18px;margin-bottom:18px;background-color:#f7f7f7;background-image:-moz-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:-ms-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#f5f5f5), to(#f9f9f9));background-image:-webkit-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:-o-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:linear-gradient(top, #f5f5f5, #f9f9f9);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#f5f5f5', endColorstr='#f9f9f9', GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);-moz-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.progress .bar{width:0%;height:18px;color:#ffffff;font-size:12px;text-align:center;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top, #149bdf, #0480be);background-image:-ms-linear-gradient(top, #149bdf, #0480be);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#149bdf), to(#0480be));background-image:-webkit-linear-gradient(top, #149bdf, #0480be);background-image:-o-linear-gradient(top, #149bdf, #0480be);background-image:linear-gradient(top, #149bdf, #0480be);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#149bdf', endColorstr='#0480be', GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;-ms-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width 0.6s ease;-moz-transition:width 0.6s ease;-ms-transition:width 0.6s ease;-o-transition:width 0.6s ease;transition:width 0.6s ease;} +.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-ms-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px;} +.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite;} +.progress-danger .bar{background-color:#dd514c;background-image:-moz-linear-gradient(top, #ee5f5b, #c43c35);background-image:-ms-linear-gradient(top, #ee5f5b, #c43c35);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ee5f5b), to(#c43c35));background-image:-webkit-linear-gradient(top, #ee5f5b, #c43c35);background-image:-o-linear-gradient(top, #ee5f5b, #c43c35);background-image:linear-gradient(top, #ee5f5b, #c43c35);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ee5f5b', endColorstr='#c43c35', GradientType=0);} +.progress-danger.progress-striped .bar{background-color:#ee5f5b;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-ms-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);} +.progress-success .bar{background-color:#5eb95e;background-image:-moz-linear-gradient(top, #62c462, #57a957);background-image:-ms-linear-gradient(top, #62c462, #57a957);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#62c462), to(#57a957));background-image:-webkit-linear-gradient(top, #62c462, #57a957);background-image:-o-linear-gradient(top, #62c462, #57a957);background-image:linear-gradient(top, #62c462, #57a957);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#62c462', endColorstr='#57a957', GradientType=0);} +.progress-success.progress-striped .bar{background-color:#62c462;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-ms-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);} +.progress-info .bar{background-color:#4bb1cf;background-image:-moz-linear-gradient(top, #5bc0de, #339bb9);background-image:-ms-linear-gradient(top, #5bc0de, #339bb9);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#5bc0de), to(#339bb9));background-image:-webkit-linear-gradient(top, #5bc0de, #339bb9);background-image:-o-linear-gradient(top, #5bc0de, #339bb9);background-image:linear-gradient(top, #5bc0de, #339bb9);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#5bc0de', endColorstr='#339bb9', GradientType=0);} +.progress-info.progress-striped .bar{background-color:#5bc0de;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-ms-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);} +.progress-warning .bar{background-color:#faa732;background-image:-moz-linear-gradient(top, #fbb450, #f89406);background-image:-ms-linear-gradient(top, #fbb450, #f89406);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#fbb450), to(#f89406));background-image:-webkit-linear-gradient(top, #fbb450, #f89406);background-image:-o-linear-gradient(top, #fbb450, #f89406);background-image:linear-gradient(top, #fbb450, #f89406);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fbb450', endColorstr='#f89406', GradientType=0);} +.progress-warning.progress-striped .bar{background-color:#fbb450;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-ms-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(-45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);} +.hero-unit{padding:60px;margin-bottom:30px;background-color:#eeeeee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;color:inherit;letter-spacing:-1px;} +.hero-unit p{font-size:18px;font-weight:200;line-height:27px;color:inherit;} +.tooltip{position:absolute;z-index:1020;display:block;visibility:visible;padding:5px;font-size:11px;opacity:0;filter:alpha(opacity=0);}.tooltip.in{opacity:0.8;filter:alpha(opacity=80);} +.tooltip.top{margin-top:-2px;} +.tooltip.right{margin-left:2px;} +.tooltip.bottom{margin-top:2px;} +.tooltip.left{margin-left:-2px;} +.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-left:5px solid transparent;border-right:5px solid transparent;border-top:5px solid #000000;} +.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-top:5px solid transparent;border-bottom:5px solid transparent;border-left:5px solid #000000;} +.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-left:5px solid transparent;border-right:5px solid transparent;border-bottom:5px solid #000000;} +.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-top:5px solid transparent;border-bottom:5px solid transparent;border-right:5px solid #000000;} +.tooltip-inner{max-width:200px;padding:3px 8px;color:#ffffff;text-align:center;text-decoration:none;background-color:#000000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.tooltip-arrow{position:absolute;width:0;height:0;} +.popover{position:absolute;top:0;left:0;z-index:1010;display:none;padding:5px;}.popover.top{margin-top:-5px;} +.popover.right{margin-left:5px;} +.popover.bottom{margin-top:5px;} +.popover.left{margin-left:-5px;} +.popover.top .arrow{bottom:0;left:50%;margin-left:-5px;border-left:5px solid transparent;border-right:5px solid transparent;border-top:5px solid #000000;} +.popover.right .arrow{top:50%;left:0;margin-top:-5px;border-top:5px solid transparent;border-bottom:5px solid transparent;border-right:5px solid #000000;} +.popover.bottom .arrow{top:0;left:50%;margin-left:-5px;border-left:5px solid transparent;border-right:5px solid transparent;border-bottom:5px solid #000000;} +.popover.left .arrow{top:50%;right:0;margin-top:-5px;border-top:5px solid transparent;border-bottom:5px solid transparent;border-left:5px solid #000000;} +.popover .arrow{position:absolute;width:0;height:0;} +.popover-inner{padding:3px;width:280px;overflow:hidden;background:#000000;background:rgba(0, 0, 0, 0.8);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);-moz-box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);} +.popover-title{padding:9px 15px;line-height:1;background-color:#f5f5f5;border-bottom:1px solid #eee;-webkit-border-radius:3px 3px 0 0;-moz-border-radius:3px 3px 0 0;border-radius:3px 3px 0 0;} +.popover-content{padding:14px;background-color:#ffffff;-webkit-border-radius:0 0 3px 3px;-moz-border-radius:0 0 3px 3px;border-radius:0 0 3px 3px;-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box;}.popover-content p,.popover-content ul,.popover-content ol{margin-bottom:0;} +.modal-open .dropdown-menu{z-index:2050;} +.modal-open .dropdown.open{*z-index:2050;} +.modal-open .popover{z-index:2060;} +.modal-open .tooltip{z-index:2070;} +.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000000;}.modal-backdrop.fade{opacity:0;} +.modal-backdrop,.modal-backdrop.fade.in{opacity:0.8;filter:alpha(opacity=80);} +.modal{position:fixed;top:50%;left:50%;z-index:1050;overflow:auto;width:560px;margin:-250px 0 0 -280px;background-color:#ffffff;border:1px solid #999;border:1px solid rgba(0, 0, 0, 0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);-moz-box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box;}.modal.fade{-webkit-transition:opacity .3s linear, top .3s ease-out;-moz-transition:opacity .3s linear, top .3s ease-out;-ms-transition:opacity .3s linear, top .3s ease-out;-o-transition:opacity .3s linear, top .3s ease-out;transition:opacity .3s linear, top .3s ease-out;top:-25%;} +.modal.fade.in{top:50%;} +.modal-header{padding:9px 15px;border-bottom:1px solid #eee;}.modal-header .close{margin-top:2px;} +.modal-body{overflow-y:auto;max-height:400px;padding:15px;} +.modal-form{margin-bottom:0;} +.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;-webkit-box-shadow:inset 0 1px 0 #ffffff;-moz-box-shadow:inset 0 1px 0 #ffffff;box-shadow:inset 0 1px 0 #ffffff;*zoom:1;}.modal-footer:before,.modal-footer:after{display:table;content:"";} +.modal-footer:after{clear:both;} +.modal-footer .btn+.btn{margin-left:5px;margin-bottom:0;} +.modal-footer .btn-group .btn+.btn{margin-left:-1px;} +.dropup,.dropdown{position:relative;} +.dropdown-toggle{*margin-bottom:-3px;} +.dropdown-toggle:active,.open .dropdown-toggle{outline:0;} +.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000000;border-right:4px solid transparent;border-left:4px solid transparent;content:"";opacity:0.3;filter:alpha(opacity=30);} +.dropdown .caret{margin-top:8px;margin-left:2px;} +.dropdown:hover .caret,.open .caret{opacity:1;filter:alpha(opacity=100);} +.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:4px 0;margin:1px 0 0;list-style:none;background-color:#ffffff;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px;-webkit-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);-moz-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box;}.dropdown-menu.pull-right{right:0;left:auto;} +.dropdown-menu .divider{*width:100%;height:1px;margin:8px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #ffffff;} +.dropdown-menu a{display:block;padding:3px 15px;clear:both;font-weight:normal;line-height:18px;color:#333333;white-space:nowrap;} +.dropdown-menu li>a:hover,.dropdown-menu .active>a,.dropdown-menu .active>a:hover{color:#ffffff;text-decoration:none;background-color:#0088cc;} +.open{*z-index:1000;}.open >.dropdown-menu{display:block;} +.pull-right>.dropdown-menu{right:0;left:auto;} +.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000000;content:"\2191";} +.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px;} +.typeahead{margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.accordion{margin-bottom:18px;} +.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.accordion-heading{border-bottom:0;} +.accordion-heading .accordion-toggle{display:block;padding:8px 15px;} +.accordion-toggle{cursor:pointer;} +.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5;} +.carousel{position:relative;margin-bottom:18px;line-height:1;} +.carousel-inner{overflow:hidden;width:100%;position:relative;} +.carousel .item{display:none;position:relative;-webkit-transition:0.6s ease-in-out left;-moz-transition:0.6s ease-in-out left;-ms-transition:0.6s ease-in-out left;-o-transition:0.6s ease-in-out left;transition:0.6s ease-in-out left;} +.carousel .item>img{display:block;line-height:1;} +.carousel .active,.carousel .next,.carousel .prev{display:block;} +.carousel .active{left:0;} +.carousel .next,.carousel .prev{position:absolute;top:0;width:100%;} +.carousel .next{left:100%;} +.carousel .prev{left:-100%;} +.carousel .next.left,.carousel .prev.right{left:0;} +.carousel .active.left{left:-100%;} +.carousel .active.right{left:100%;} +.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#ffffff;text-align:center;background:#222222;border:3px solid #ffffff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:0.5;filter:alpha(opacity=50);}.carousel-control.right{left:auto;right:15px;} +.carousel-control:hover{color:#ffffff;text-decoration:none;opacity:0.9;filter:alpha(opacity=90);} +.carousel-caption{position:absolute;left:0;right:0;bottom:0;padding:10px 15px 5px;background:#333333;background:rgba(0, 0, 0, 0.75);} +.carousel-caption h4,.carousel-caption p{color:#ffffff;} +.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #eee;border:1px solid rgba(0, 0, 0, 0.05);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);}.well blockquote{border-color:#ddd;border-color:rgba(0, 0, 0, 0.15);} +.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;} +.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} +.close{float:right;font-size:20px;font-weight:bold;line-height:18px;color:#000000;text-shadow:0 1px 0 #ffffff;opacity:0.2;filter:alpha(opacity=20);}.close:hover{color:#000000;text-decoration:none;cursor:pointer;opacity:0.4;filter:alpha(opacity=40);} +button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none;} +.pull-right{float:right;} +.pull-left{float:left;} +.hide{display:none;} +.show{display:block;} +.invisible{visibility:hidden;} +.fade{opacity:0;-webkit-transition:opacity 0.15s linear;-moz-transition:opacity 0.15s linear;-ms-transition:opacity 0.15s linear;-o-transition:opacity 0.15s linear;transition:opacity 0.15s linear;}.fade.in{opacity:1;} +.collapse{position:relative;height:0;overflow:hidden;-webkit-transition:height 0.35s ease;-moz-transition:height 0.35s ease;-ms-transition:height 0.35s ease;-o-transition:height 0.35s ease;transition:height 0.35s ease;}.collapse.in{height:auto;} +.hidden{display:none;visibility:hidden;} +.visible-phone{display:none !important;} +.visible-tablet{display:none !important;} +.hidden-desktop{display:none !important;} +@media (max-width:767px){.visible-phone{display:inherit !important;} .hidden-phone{display:none !important;} .hidden-desktop{display:inherit !important;} .visible-desktop{display:none !important;}}@media (min-width:768px) and (max-width:979px){.visible-tablet{display:inherit !important;} .hidden-tablet{display:none !important;} .hidden-desktop{display:inherit !important;} .visible-desktop{display:none !important ;}}@media (max-width:480px){.nav-collapse{-webkit-transform:translate3d(0, 0, 0);} .page-header h1 small{display:block;line-height:18px;} input[type="checkbox"],input[type="radio"]{border:1px solid #ccc;} .form-horizontal .control-group>label{float:none;width:auto;padding-top:0;text-align:left;} .form-horizontal .controls{margin-left:0;} .form-horizontal .control-list{padding-top:0;} .form-horizontal .form-actions{padding-left:10px;padding-right:10px;} .modal{position:absolute;top:10px;left:10px;right:10px;width:auto;margin:0;}.modal.fade.in{top:auto;} .modal-header .close{padding:10px;margin:-10px;} .carousel-caption{position:static;}}@media (max-width:767px){body{padding-left:20px;padding-right:20px;} .navbar-fixed-top,.navbar-fixed-bottom{margin-left:-20px;margin-right:-20px;} .container-fluid{padding:0;} .dl-horizontal dt{float:none;clear:none;width:auto;text-align:left;} .dl-horizontal dd{margin-left:0;} .container{width:auto;} .row-fluid{width:100%;} .row,.thumbnails{margin-left:0;} [class*="span"],.row-fluid [class*="span"]{float:none;display:block;width:auto;margin-left:0;} .input-large,.input-xlarge,.input-xxlarge,input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input{display:block;width:100%;min-height:28px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;-ms-box-sizing:border-box;box-sizing:border-box;} .input-prepend input,.input-append input,.input-prepend input[class*="span"],.input-append input[class*="span"]{display:inline-block;width:auto;}}@media (min-width:768px) and (max-width:979px){.row{margin-left:-20px;*zoom:1;}.row:before,.row:after{display:table;content:"";} .row:after{clear:both;} [class*="span"]{float:left;margin-left:20px;} .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:724px;} .span12{width:724px;} .span11{width:662px;} .span10{width:600px;} .span9{width:538px;} .span8{width:476px;} .span7{width:414px;} .span6{width:352px;} .span5{width:290px;} .span4{width:228px;} .span3{width:166px;} .span2{width:104px;} .span1{width:42px;} .offset12{margin-left:764px;} .offset11{margin-left:702px;} .offset10{margin-left:640px;} .offset9{margin-left:578px;} .offset8{margin-left:516px;} .offset7{margin-left:454px;} .offset6{margin-left:392px;} .offset5{margin-left:330px;} .offset4{margin-left:268px;} .offset3{margin-left:206px;} .offset2{margin-left:144px;} .offset1{margin-left:82px;} .row-fluid{width:100%;*zoom:1;}.row-fluid:before,.row-fluid:after{display:table;content:"";} .row-fluid:after{clear:both;} .row-fluid [class*="span"]{display:block;width:100%;min-height:28px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;-ms-box-sizing:border-box;box-sizing:border-box;float:left;margin-left:2.762430939%;*margin-left:2.709239449638298%;} .row-fluid [class*="span"]:first-child{margin-left:0;} .row-fluid .span12{width:99.999999993%;*width:99.9468085036383%;} .row-fluid .span11{width:91.436464082%;*width:91.38327259263829%;} .row-fluid .span10{width:82.87292817100001%;*width:82.8197366816383%;} .row-fluid .span9{width:74.30939226%;*width:74.25620077063829%;} .row-fluid .span8{width:65.74585634900001%;*width:65.6926648596383%;} .row-fluid .span7{width:57.182320438000005%;*width:57.129128948638304%;} .row-fluid .span6{width:48.618784527%;*width:48.5655930376383%;} .row-fluid .span5{width:40.055248616%;*width:40.0020571266383%;} .row-fluid .span4{width:31.491712705%;*width:31.4385212156383%;} .row-fluid .span3{width:22.928176794%;*width:22.874985304638297%;} .row-fluid .span2{width:14.364640883%;*width:14.311449393638298%;} .row-fluid .span1{width:5.801104972%;*width:5.747913482638298%;} input,textarea,.uneditable-input{margin-left:0;} input.span12, textarea.span12, .uneditable-input.span12{width:714px;} input.span11, textarea.span11, .uneditable-input.span11{width:652px;} input.span10, textarea.span10, .uneditable-input.span10{width:590px;} input.span9, textarea.span9, .uneditable-input.span9{width:528px;} input.span8, textarea.span8, .uneditable-input.span8{width:466px;} input.span7, textarea.span7, .uneditable-input.span7{width:404px;} input.span6, textarea.span6, .uneditable-input.span6{width:342px;} input.span5, textarea.span5, .uneditable-input.span5{width:280px;} input.span4, textarea.span4, .uneditable-input.span4{width:218px;} input.span3, textarea.span3, .uneditable-input.span3{width:156px;} input.span2, textarea.span2, .uneditable-input.span2{width:94px;} input.span1, textarea.span1, .uneditable-input.span1{width:32px;}}@media (min-width:1200px){.row{margin-left:-30px;*zoom:1;}.row:before,.row:after{display:table;content:"";} .row:after{clear:both;} [class*="span"]{float:left;margin-left:30px;} .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:1170px;} .span12{width:1170px;} .span11{width:1070px;} .span10{width:970px;} .span9{width:870px;} .span8{width:770px;} .span7{width:670px;} .span6{width:570px;} .span5{width:470px;} .span4{width:370px;} .span3{width:270px;} .span2{width:170px;} .span1{width:70px;} .offset12{margin-left:1230px;} .offset11{margin-left:1130px;} .offset10{margin-left:1030px;} .offset9{margin-left:930px;} .offset8{margin-left:830px;} .offset7{margin-left:730px;} .offset6{margin-left:630px;} .offset5{margin-left:530px;} .offset4{margin-left:430px;} .offset3{margin-left:330px;} .offset2{margin-left:230px;} .offset1{margin-left:130px;} .row-fluid{width:100%;*zoom:1;}.row-fluid:before,.row-fluid:after{display:table;content:"";} .row-fluid:after{clear:both;} .row-fluid [class*="span"]{display:block;width:100%;min-height:28px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;-ms-box-sizing:border-box;box-sizing:border-box;float:left;margin-left:2.564102564%;*margin-left:2.510911074638298%;} .row-fluid [class*="span"]:first-child{margin-left:0;} .row-fluid .span12{width:100%;*width:99.94680851063829%;} .row-fluid .span11{width:91.45299145300001%;*width:91.3997999636383%;} .row-fluid .span10{width:82.905982906%;*width:82.8527914166383%;} .row-fluid .span9{width:74.358974359%;*width:74.30578286963829%;} .row-fluid .span8{width:65.81196581200001%;*width:65.7587743226383%;} .row-fluid .span7{width:57.264957265%;*width:57.2117657756383%;} .row-fluid .span6{width:48.717948718%;*width:48.6647572286383%;} .row-fluid .span5{width:40.170940171000005%;*width:40.117748681638304%;} .row-fluid .span4{width:31.623931624%;*width:31.5707401346383%;} .row-fluid .span3{width:23.076923077%;*width:23.0237315876383%;} .row-fluid .span2{width:14.529914530000001%;*width:14.4767230406383%;} .row-fluid .span1{width:5.982905983%;*width:5.929714493638298%;} input,textarea,.uneditable-input{margin-left:0;} input.span12, textarea.span12, .uneditable-input.span12{width:1160px;} input.span11, textarea.span11, .uneditable-input.span11{width:1060px;} input.span10, textarea.span10, .uneditable-input.span10{width:960px;} input.span9, textarea.span9, .uneditable-input.span9{width:860px;} input.span8, textarea.span8, .uneditable-input.span8{width:760px;} input.span7, textarea.span7, .uneditable-input.span7{width:660px;} input.span6, textarea.span6, .uneditable-input.span6{width:560px;} input.span5, textarea.span5, .uneditable-input.span5{width:460px;} input.span4, textarea.span4, .uneditable-input.span4{width:360px;} input.span3, textarea.span3, .uneditable-input.span3{width:260px;} input.span2, textarea.span2, .uneditable-input.span2{width:160px;} input.span1, textarea.span1, .uneditable-input.span1{width:60px;} .thumbnails{margin-left:-30px;} .thumbnails>li{margin-left:30px;} .row-fluid .thumbnails{margin-left:0;}}@media (max-width:979px){body{padding-top:0;} .navbar-fixed-top,.navbar-fixed-bottom{position:static;} .navbar-fixed-top{margin-bottom:18px;} .navbar-fixed-bottom{margin-top:18px;} .navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding:5px;} .navbar .container{width:auto;padding:0;} .navbar .brand{padding-left:10px;padding-right:10px;margin:0 0 0 -5px;} .nav-collapse{clear:both;} .nav-collapse .nav{float:none;margin:0 0 9px;} .nav-collapse .nav>li{float:none;} .nav-collapse .nav>li>a{margin-bottom:2px;} .nav-collapse .nav>.divider-vertical{display:none;} .nav-collapse .nav .nav-header{color:#999999;text-shadow:none;} .nav-collapse .nav>li>a,.nav-collapse .dropdown-menu a{padding:6px 15px;font-weight:bold;color:#999999;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} .nav-collapse .btn{padding:4px 10px 4px;font-weight:normal;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} .nav-collapse .dropdown-menu li+li a{margin-bottom:2px;} .nav-collapse .nav>li>a:hover,.nav-collapse .dropdown-menu a:hover{background-color:#222222;} .nav-collapse.in .btn-group{margin-top:5px;padding:0;} .nav-collapse .dropdown-menu{position:static;top:auto;left:auto;float:none;display:block;max-width:none;margin:0 15px;padding:0;background-color:transparent;border:none;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;} .nav-collapse .dropdown-menu:before,.nav-collapse .dropdown-menu:after{display:none;} .nav-collapse .dropdown-menu .divider{display:none;} .nav-collapse .navbar-form,.nav-collapse .navbar-search{float:none;padding:9px 15px;margin:9px 0;border-top:1px solid #222222;border-bottom:1px solid #222222;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);} .navbar .nav-collapse .nav.pull-right{float:none;margin-left:0;} .nav-collapse,.nav-collapse.collapse{overflow:hidden;height:0;} .navbar .btn-navbar{display:block;} .navbar-static .navbar-inner{padding-left:10px;padding-right:10px;}}@media (min-width:980px){.nav-collapse.collapse{height:auto !important;overflow:visible !important;}} \ No newline at end of file diff --git a/settings/helpTemplates/bootstrap.min.js b/settings/helpTemplates/bootstrap.min.js new file mode 100644 index 000000000..2c00b6f1d --- /dev/null +++ b/settings/helpTemplates/bootstrap.min.js @@ -0,0 +1,7 @@ +/** +* Bootstrap.js by @fat & @mdo +* plugins: bootstrap-transition.js, bootstrap-modal.js, bootstrap-dropdown.js, bootstrap-scrollspy.js, bootstrap-tab.js, bootstrap-tooltip.js, bootstrap-popover.js, bootstrap-alert.js, bootstrap-button.js, bootstrap-collapse.js, bootstrap-carousel.js, bootstrap-typeahead.js +* Copyright 2012 Twitter, Inc. +* http://www.apache.org/licenses/LICENSE-2.0.txt +*/ +!function(a){a(function(){a.support.transition=function(){var a=function(){var a=document.createElement("bootstrap"),b={WebkitTransition:"webkitTransitionEnd",MozTransition:"transitionend",OTransition:"oTransitionEnd",msTransition:"MSTransitionEnd",transition:"transitionend"},c;for(c in b)if(a.style[c]!==undefined)return b[c]}();return a&&{end:a}}()})}(window.jQuery),!function(a){function c(){var b=this,c=setTimeout(function(){b.$element.off(a.support.transition.end),d.call(b)},500);this.$element.one(a.support.transition.end,function(){clearTimeout(c),d.call(b)})}function d(a){this.$element.hide().trigger("hidden"),e.call(this)}function e(b){var c=this,d=this.$element.hasClass("fade")?"fade":"";if(this.isShown&&this.options.backdrop){var e=a.support.transition&&d;this.$backdrop=a(' + + + +<@makeHeader title="GATK documentation index" isIndex=true /> +

GATK documentation index + ${version} +

+
+
+ <#list groups?sort_by("name") as group> + <@emitGroup group=group/> - - - - -<@makeHeader title="GATK documentation index"/> - -

GATK documentation index

- <@headerInfo /> - <#list groups?sort_by("name") as group> - <@emitGroup group=group/> - - - <@footerInfo /> - - +
+<@footerInfo /> +<@pageFooter /> diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html index 7fc8cd7bd..ce77acb5e 100644 --- a/settings/helpTemplates/generic.template.html +++ b/settings/helpTemplates/generic.template.html @@ -1,8 +1,34 @@ + + <#include "common.html"/> <#macro argumentlist name myargs> - <#if myargs?size != 0> - ${name} + <#if myargs?size != 0> + + ${name} + <#list myargs as arg> ${arg.name} @@ -11,116 +37,142 @@ ${arg.summary} <#-- - ${arg.required} + < + td>${arg.required} --> - - + + <#macro argumentDetails arg> -

${arg.name}<#if arg.synonyms??> / ${arg.synonyms} - (<#if arg.attributes??>${arg.attributes} ${arg.type}<#if arg.defaultValue??> with default value ${arg.defaultValue})

+

${arg.name} + <#if arg.synonyms??> / ${arg.synonyms} + + ( + <#if arg.attributes??>${arg.attributes} + ${arg.type} + <#if arg.defaultValue??> with default value ${arg.defaultValue} + ) +

- ${arg.summary}. ${arg.fulltext} - <#if arg.rodTypes??>${arg.name} binds reference ordered data. This argument supports ROD files of the - following types: ${arg.rodTypes} - <#if arg.options??> -
- The ${arg.name} argument is an enumerated type (${arg.type}), which can have one of the following values: -

- <#list arg.options as option> -
${option.name}
-
${option.summary}
- -
- + ${arg.summary}. ${arg.fulltext} + <#if arg.rodTypes??>${arg.name} binds reference ordered data. This argument supports ROD files of the + following types: ${arg.rodTypes} + + <#if arg.options??> +
+ The ${arg.name} argument is an enumerated type (${arg.type}), which can have one of the following values: +
+ <#list arg.options as option> +
${option.name}
+
${option.summary}
+ +
+

- - + + <#macro relatedByType name type> <#list relatedDocs as relatedDoc> <#if relatedDoc.relation == type>

${name}

    - <#list relatedDocs as relatedDoc> - <#if relatedDoc.relation == type> -
  • ${relatedDoc.name} is a ${relatedDoc.relation}
  • - - + <#list relatedDocs as relatedDoc> + <#if relatedDoc.relation == type> +
  • ${relatedDoc.name} is a ${relatedDoc.relation}
  • + +
- <#break> + <#break> - -<@makeHeader title="${name} documentation"/> - -

${name}

- <@headerInfo /> -

${summary}

- <#if author??> -

Author

- ${author} - -

Introduction

- ${description} - - <#-- Create the argument summary --> - <#if arguments.all?size != 0> -
-

${name} specific arguments

- - - - - - - - - - - <@argumentlist name="Required" myargs=arguments.required/> - <@argumentlist name="Optional" myargs=arguments.optional/> - <@argumentlist name="Advanced" myargs=arguments.advanced/> - <@argumentlist name="Hidden" myargs=arguments.hidden/> - <@argumentlist name="Depreciated" myargs=arguments.depreciated/> - -
NameTypeDefault valueSummary
- + +<@makeHeader title="${name} documentation" isIndex=false /> +
+ + - <#-- Create references to additional capabilities if appropriate --> - <#if extradocs?size != 0> -
-

Additional capabilities

- The arguments described in the entries below can be supplied to this tool to modify - its behavior. For example, the -L argument directs the GATK engine restricts processing - to specific genomic intervals. This capability is available to all GATK walkers. - - - - <#-- This class is related to other documented classes via sub/super relationships --> - <#if relatedDocs?? && relatedDocs?size != 0> -
-

Related capabilities

- <@relatedByType name="Superclasses" type="superclass"/> - <@relatedByType name="Subclasses" type="subclass"/> - - - <#-- List all of the --> - <#if arguments.all?size != 0> -
- <#-- Create the argument details --> -

Argument details

- <#list arguments.all as arg> - <@argumentDetails arg=arg/> - - - - <@footerInfo /> - - +
+ +
+ +

${name}

+ +

${summary}

+ <#if author??> +

Author + ${author} +

+ + <#if group?? > +

Category + ${group} +

+ +
+ +

Introduction

+ ${description} + + <#-- Create the argument summary --> + <#if arguments.all?size != 0> +
+

${name} specific arguments

+ + + + + + + + + + + <@argumentlist name="Required" myargs=arguments.required/> + <@argumentlist name="Optional" myargs=arguments.optional/> + <@argumentlist name="Advanced" myargs=arguments.advanced/> + <@argumentlist name="Hidden" myargs=arguments.hidden/> + <@argumentlist name="Depreciated" myargs=arguments.depreciated/> + +
NameTypeDefault valueSummary
+ + + <#-- Create references to additional capabilities if appropriate --> + <#if extradocs?size != 0> +
+

Additional capabilities

+ The arguments described in the entries below can be supplied to this tool to modify + its behavior. For example, the -L argument directs the GATK engine restricts processing + to specific genomic intervals. This capability is available to all GATK walkers. + + + + <#-- This class is related to other documented classes via sub/super relationships --> + <#if relatedDocs?? && relatedDocs?size != 0> +
+

Related capabilities

+ <@relatedByType name="Superclasses" type="superclass"/> + <@relatedByType name="Subclasses" type="subclass"/> + + + <#-- List all of the --> + <#if arguments.all?size != 0> +
+ <#-- Create the argument details --> +

Argument details

+ <#list arguments.all as arg> + <@argumentDetails arg=arg/> + + + + <@footerInfo /> + <@pageFooter /> \ No newline at end of file diff --git a/settings/helpTemplates/jquery.min.js b/settings/helpTemplates/jquery.min.js new file mode 100644 index 000000000..16ad06c5a --- /dev/null +++ b/settings/helpTemplates/jquery.min.js @@ -0,0 +1,4 @@ +/*! jQuery v1.7.2 jquery.com | jquery.org/license */ +(function(a,b){function cy(a){return f.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}function cu(a){if(!cj[a]){var b=c.body,d=f("<"+a+">").appendTo(b),e=d.css("display");d.remove();if(e==="none"||e===""){ck||(ck=c.createElement("iframe"),ck.frameBorder=ck.width=ck.height=0),b.appendChild(ck);if(!cl||!ck.createElement)cl=(ck.contentWindow||ck.contentDocument).document,cl.write((f.support.boxModel?"":"")+""),cl.close();d=cl.createElement(a),cl.body.appendChild(d),e=f.css(d,"display"),b.removeChild(ck)}cj[a]=e}return cj[a]}function ct(a,b){var c={};f.each(cp.concat.apply([],cp.slice(0,b)),function(){c[this]=a});return c}function cs(){cq=b}function cr(){setTimeout(cs,0);return cq=f.now()}function ci(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}function ch(){try{return new a.XMLHttpRequest}catch(b){}}function cb(a,c){a.dataFilter&&(c=a.dataFilter(c,a.dataType));var d=a.dataTypes,e={},g,h,i=d.length,j,k=d[0],l,m,n,o,p;for(g=1;g0){if(c!=="border")for(;e=0===c})}function S(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function K(){return!0}function J(){return!1}function n(a,b,c){var d=b+"defer",e=b+"queue",g=b+"mark",h=f._data(a,d);h&&(c==="queue"||!f._data(a,e))&&(c==="mark"||!f._data(a,g))&&setTimeout(function(){!f._data(a,e)&&!f._data(a,g)&&(f.removeData(a,d,!0),h.fire())},0)}function m(a){for(var b in a){if(b==="data"&&f.isEmptyObject(a[b]))continue;if(b!=="toJSON")return!1}return!0}function l(a,c,d){if(d===b&&a.nodeType===1){var e="data-"+c.replace(k,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:f.isNumeric(d)?+d:j.test(d)?f.parseJSON(d):d}catch(g){}f.data(a,c,d)}else d=b}return d}function h(a){var b=g[a]={},c,d;a=a.split(/\s+/);for(c=0,d=a.length;c)[^>]*$|#([\w\-]*)$)/,j=/\S/,k=/^\s+/,l=/\s+$/,m=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,n=/^[\],:{}\s]*$/,o=/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,p=/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,q=/(?:^|:|,)(?:\s*\[)+/g,r=/(webkit)[ \/]([\w.]+)/,s=/(opera)(?:.*version)?[ \/]([\w.]+)/,t=/(msie) ([\w.]+)/,u=/(mozilla)(?:.*? rv:([\w.]+))?/,v=/-([a-z]|[0-9])/ig,w=/^-ms-/,x=function(a,b){return(b+"").toUpperCase()},y=d.userAgent,z,A,B,C=Object.prototype.toString,D=Object.prototype.hasOwnProperty,E=Array.prototype.push,F=Array.prototype.slice,G=String.prototype.trim,H=Array.prototype.indexOf,I={};e.fn=e.prototype={constructor:e,init:function(a,d,f){var g,h,j,k;if(!a)return this;if(a.nodeType){this.context=this[0]=a,this.length=1;return this}if(a==="body"&&!d&&c.body){this.context=c,this[0]=c.body,this.selector=a,this.length=1;return this}if(typeof a=="string"){a.charAt(0)!=="<"||a.charAt(a.length-1)!==">"||a.length<3?g=i.exec(a):g=[null,a,null];if(g&&(g[1]||!d)){if(g[1]){d=d instanceof e?d[0]:d,k=d?d.ownerDocument||d:c,j=m.exec(a),j?e.isPlainObject(d)?(a=[c.createElement(j[1])],e.fn.attr.call(a,d,!0)):a=[k.createElement(j[1])]:(j=e.buildFragment([g[1]],[k]),a=(j.cacheable?e.clone(j.fragment):j.fragment).childNodes);return e.merge(this,a)}h=c.getElementById(g[2]);if(h&&h.parentNode){if(h.id!==g[2])return f.find(a);this.length=1,this[0]=h}this.context=c,this.selector=a;return this}return!d||d.jquery?(d||f).find(a):this.constructor(d).find(a)}if(e.isFunction(a))return f.ready(a);a.selector!==b&&(this.selector=a.selector,this.context=a.context);return e.makeArray(a,this)},selector:"",jquery:"1.7.2",length:0,size:function(){return this.length},toArray:function(){return F.call(this,0)},get:function(a){return a==null?this.toArray():a<0?this[this.length+a]:this[a]},pushStack:function(a,b,c){var d=this.constructor();e.isArray(a)?E.apply(d,a):e.merge(d,a),d.prevObject=this,d.context=this.context,b==="find"?d.selector=this.selector+(this.selector?" ":"")+c:b&&(d.selector=this.selector+"."+b+"("+c+")");return d},each:function(a,b){return e.each(this,a,b)},ready:function(a){e.bindReady(),A.add(a);return this},eq:function(a){a=+a;return a===-1?this.slice(a):this.slice(a,a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(F.apply(this,arguments),"slice",F.call(arguments).join(","))},map:function(a){return this.pushStack(e.map(this,function(b,c){return a.call(b,c,b)}))},end:function(){return this.prevObject||this.constructor(null)},push:E,sort:[].sort,splice:[].splice},e.fn.init.prototype=e.fn,e.extend=e.fn.extend=function(){var a,c,d,f,g,h,i=arguments[0]||{},j=1,k=arguments.length,l=!1;typeof i=="boolean"&&(l=i,i=arguments[1]||{},j=2),typeof i!="object"&&!e.isFunction(i)&&(i={}),k===j&&(i=this,--j);for(;j0)return;A.fireWith(c,[e]),e.fn.trigger&&e(c).trigger("ready").off("ready")}},bindReady:function(){if(!A){A=e.Callbacks("once memory");if(c.readyState==="complete")return setTimeout(e.ready,1);if(c.addEventListener)c.addEventListener("DOMContentLoaded",B,!1),a.addEventListener("load",e.ready,!1);else if(c.attachEvent){c.attachEvent("onreadystatechange",B),a.attachEvent("onload",e.ready);var b=!1;try{b=a.frameElement==null}catch(d){}c.documentElement.doScroll&&b&&J()}}},isFunction:function(a){return e.type(a)==="function"},isArray:Array.isArray||function(a){return e.type(a)==="array"},isWindow:function(a){return a!=null&&a==a.window},isNumeric:function(a){return!isNaN(parseFloat(a))&&isFinite(a)},type:function(a){return a==null?String(a):I[C.call(a)]||"object"},isPlainObject:function(a){if(!a||e.type(a)!=="object"||a.nodeType||e.isWindow(a))return!1;try{if(a.constructor&&!D.call(a,"constructor")&&!D.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}var d;for(d in a);return d===b||D.call(a,d)},isEmptyObject:function(a){for(var b in a)return!1;return!0},error:function(a){throw new Error(a)},parseJSON:function(b){if(typeof b!="string"||!b)return null;b=e.trim(b);if(a.JSON&&a.JSON.parse)return a.JSON.parse(b);if(n.test(b.replace(o,"@").replace(p,"]").replace(q,"")))return(new Function("return "+b))();e.error("Invalid JSON: "+b)},parseXML:function(c){if(typeof c!="string"||!c)return null;var d,f;try{a.DOMParser?(f=new DOMParser,d=f.parseFromString(c,"text/xml")):(d=new ActiveXObject("Microsoft.XMLDOM"),d.async="false",d.loadXML(c))}catch(g){d=b}(!d||!d.documentElement||d.getElementsByTagName("parsererror").length)&&e.error("Invalid XML: "+c);return d},noop:function(){},globalEval:function(b){b&&j.test(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(w,"ms-").replace(v,x)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toUpperCase()===b.toUpperCase()},each:function(a,c,d){var f,g=0,h=a.length,i=h===b||e.isFunction(a);if(d){if(i){for(f in a)if(c.apply(a[f],d)===!1)break}else for(;g0&&a[0]&&a[j-1]||j===0||e.isArray(a));if(k)for(;i1?i.call(arguments,0):b,j.notifyWith(k,e)}}function l(a){return function(c){b[a]=arguments.length>1?i.call(arguments,0):c,--g||j.resolveWith(j,b)}}var b=i.call(arguments,0),c=0,d=b.length,e=Array(d),g=d,h=d,j=d<=1&&a&&f.isFunction(a.promise)?a:f.Deferred(),k=j.promise();if(d>1){for(;c
a",d=p.getElementsByTagName("*"),e=p.getElementsByTagName("a")[0];if(!d||!d.length||!e)return{};g=c.createElement("select"),h=g.appendChild(c.createElement("option")),i=p.getElementsByTagName("input")[0],b={leadingWhitespace:p.firstChild.nodeType===3,tbody:!p.getElementsByTagName("tbody").length,htmlSerialize:!!p.getElementsByTagName("link").length,style:/top/.test(e.getAttribute("style")),hrefNormalized:e.getAttribute("href")==="/a",opacity:/^0.55/.test(e.style.opacity),cssFloat:!!e.style.cssFloat,checkOn:i.value==="on",optSelected:h.selected,getSetAttribute:p.className!=="t",enctype:!!c.createElement("form").enctype,html5Clone:c.createElement("nav").cloneNode(!0).outerHTML!=="<:nav>",submitBubbles:!0,changeBubbles:!0,focusinBubbles:!1,deleteExpando:!0,noCloneEvent:!0,inlineBlockNeedsLayout:!1,shrinkWrapBlocks:!1,reliableMarginRight:!0,pixelMargin:!0},f.boxModel=b.boxModel=c.compatMode==="CSS1Compat",i.checked=!0,b.noCloneChecked=i.cloneNode(!0).checked,g.disabled=!0,b.optDisabled=!h.disabled;try{delete p.test}catch(r){b.deleteExpando=!1}!p.addEventListener&&p.attachEvent&&p.fireEvent&&(p.attachEvent("onclick",function(){b.noCloneEvent=!1}),p.cloneNode(!0).fireEvent("onclick")),i=c.createElement("input"),i.value="t",i.setAttribute("type","radio"),b.radioValue=i.value==="t",i.setAttribute("checked","checked"),i.setAttribute("name","t"),p.appendChild(i),j=c.createDocumentFragment(),j.appendChild(p.lastChild),b.checkClone=j.cloneNode(!0).cloneNode(!0).lastChild.checked,b.appendChecked=i.checked,j.removeChild(i),j.appendChild(p);if(p.attachEvent)for(n in{submit:1,change:1,focusin:1})m="on"+n,o=m in p,o||(p.setAttribute(m,"return;"),o=typeof p[m]=="function"),b[n+"Bubbles"]=o;j.removeChild(p),j=g=h=p=i=null,f(function(){var d,e,g,h,i,j,l,m,n,q,r,s,t,u=c.getElementsByTagName("body")[0];!u||(m=1,t="padding:0;margin:0;border:",r="position:absolute;top:0;left:0;width:1px;height:1px;",s=t+"0;visibility:hidden;",n="style='"+r+t+"5px solid #000;",q="
"+""+"
",d=c.createElement("div"),d.style.cssText=s+"width:0;height:0;position:static;top:0;margin-top:"+m+"px",u.insertBefore(d,u.firstChild),p=c.createElement("div"),d.appendChild(p),p.innerHTML="
t
",k=p.getElementsByTagName("td"),o=k[0].offsetHeight===0,k[0].style.display="",k[1].style.display="none",b.reliableHiddenOffsets=o&&k[0].offsetHeight===0,a.getComputedStyle&&(p.innerHTML="",l=c.createElement("div"),l.style.width="0",l.style.marginRight="0",p.style.width="2px",p.appendChild(l),b.reliableMarginRight=(parseInt((a.getComputedStyle(l,null)||{marginRight:0}).marginRight,10)||0)===0),typeof p.style.zoom!="undefined"&&(p.innerHTML="",p.style.width=p.style.padding="1px",p.style.border=0,p.style.overflow="hidden",p.style.display="inline",p.style.zoom=1,b.inlineBlockNeedsLayout=p.offsetWidth===3,p.style.display="block",p.style.overflow="visible",p.innerHTML="
",b.shrinkWrapBlocks=p.offsetWidth!==3),p.style.cssText=r+s,p.innerHTML=q,e=p.firstChild,g=e.firstChild,i=e.nextSibling.firstChild.firstChild,j={doesNotAddBorder:g.offsetTop!==5,doesAddBorderForTableAndCells:i.offsetTop===5},g.style.position="fixed",g.style.top="20px",j.fixedPosition=g.offsetTop===20||g.offsetTop===15,g.style.position=g.style.top="",e.style.overflow="hidden",e.style.position="relative",j.subtractsBorderForOverflowNotVisible=g.offsetTop===-5,j.doesNotIncludeMarginInBodyOffset=u.offsetTop!==m,a.getComputedStyle&&(p.style.marginTop="1%",b.pixelMargin=(a.getComputedStyle(p,null)||{marginTop:0}).marginTop!=="1%"),typeof d.style.zoom!="undefined"&&(d.style.zoom=1),u.removeChild(d),l=p=d=null,f.extend(b,j))});return b}();var j=/^(?:\{.*\}|\[.*\])$/,k=/([A-Z])/g;f.extend({cache:{},uuid:0,expando:"jQuery"+(f.fn.jquery+Math.random()).replace(/\D/g,""),noData:{embed:!0,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:!0},hasData:function(a){a=a.nodeType?f.cache[a[f.expando]]:a[f.expando];return!!a&&!m(a)},data:function(a,c,d,e){if(!!f.acceptData(a)){var g,h,i,j=f.expando,k=typeof c=="string",l=a.nodeType,m=l?f.cache:a,n=l?a[j]:a[j]&&j,o=c==="events";if((!n||!m[n]||!o&&!e&&!m[n].data)&&k&&d===b)return;n||(l?a[j]=n=++f.uuid:n=j),m[n]||(m[n]={},l||(m[n].toJSON=f.noop));if(typeof c=="object"||typeof c=="function")e?m[n]=f.extend(m[n],c):m[n].data=f.extend(m[n].data,c);g=h=m[n],e||(h.data||(h.data={}),h=h.data),d!==b&&(h[f.camelCase(c)]=d);if(o&&!h[c])return g.events;k?(i=h[c],i==null&&(i=h[f.camelCase(c)])):i=h;return i}},removeData:function(a,b,c){if(!!f.acceptData(a)){var d,e,g,h=f.expando,i=a.nodeType,j=i?f.cache:a,k=i?a[h]:h;if(!j[k])return;if(b){d=c?j[k]:j[k].data;if(d){f.isArray(b)||(b in d?b=[b]:(b=f.camelCase(b),b in d?b=[b]:b=b.split(" ")));for(e=0,g=b.length;e1,null,!1)},removeData:function(a){return this.each(function(){f.removeData(this,a)})}}),f.extend({_mark:function(a,b){a&&(b=(b||"fx")+"mark",f._data(a,b,(f._data(a,b)||0)+1))},_unmark:function(a,b,c){a!==!0&&(c=b,b=a,a=!1);if(b){c=c||"fx";var d=c+"mark",e=a?0:(f._data(b,d)||1)-1;e?f._data(b,d,e):(f.removeData(b,d,!0),n(b,c,"mark"))}},queue:function(a,b,c){var d;if(a){b=(b||"fx")+"queue",d=f._data(a,b),c&&(!d||f.isArray(c)?d=f._data(a,b,f.makeArray(c)):d.push(c));return d||[]}},dequeue:function(a,b){b=b||"fx";var c=f.queue(a,b),d=c.shift(),e={};d==="inprogress"&&(d=c.shift()),d&&(b==="fx"&&c.unshift("inprogress"),f._data(a,b+".run",e),d.call(a,function(){f.dequeue(a,b)},e)),c.length||(f.removeData(a,b+"queue "+b+".run",!0),n(a,b,"queue"))}}),f.fn.extend({queue:function(a,c){var d=2;typeof a!="string"&&(c=a,a="fx",d--);if(arguments.length1)},removeAttr:function(a){return this.each(function(){f.removeAttr(this,a)})},prop:function(a,b){return f.access(this,f.prop,a,b,arguments.length>1)},removeProp:function(a){a=f.propFix[a]||a;return this.each(function(){try{this[a]=b,delete this[a]}catch(c){}})},addClass:function(a){var b,c,d,e,g,h,i;if(f.isFunction(a))return this.each(function(b){f(this).addClass(a.call(this,b,this.className))});if(a&&typeof a=="string"){b=a.split(p);for(c=0,d=this.length;c-1)return!0;return!1},val:function(a){var c,d,e,g=this[0];{if(!!arguments.length){e=f.isFunction(a);return this.each(function(d){var g=f(this),h;if(this.nodeType===1){e?h=a.call(this,d,g.val()):h=a,h==null?h="":typeof h=="number"?h+="":f.isArray(h)&&(h=f.map(h,function(a){return a==null?"":a+""})),c=f.valHooks[this.type]||f.valHooks[this.nodeName.toLowerCase()];if(!c||!("set"in c)||c.set(this,h,"value")===b)this.value=h}})}if(g){c=f.valHooks[g.type]||f.valHooks[g.nodeName.toLowerCase()];if(c&&"get"in c&&(d=c.get(g,"value"))!==b)return d;d=g.value;return typeof d=="string"?d.replace(q,""):d==null?"":d}}}}),f.extend({valHooks:{option:{get:function(a){var b=a.attributes.value;return!b||b.specified?a.value:a.text}},select:{get:function(a){var b,c,d,e,g=a.selectedIndex,h=[],i=a.options,j=a.type==="select-one";if(g<0)return null;c=j?g:0,d=j?g+1:i.length;for(;c=0}),c.length||(a.selectedIndex=-1);return c}}},attrFn:{val:!0,css:!0,html:!0,text:!0,data:!0,width:!0,height:!0,offset:!0},attr:function(a,c,d,e){var g,h,i,j=a.nodeType;if(!!a&&j!==3&&j!==8&&j!==2){if(e&&c in f.attrFn)return f(a)[c](d);if(typeof a.getAttribute=="undefined")return f.prop(a,c,d);i=j!==1||!f.isXMLDoc(a),i&&(c=c.toLowerCase(),h=f.attrHooks[c]||(u.test(c)?x:w));if(d!==b){if(d===null){f.removeAttr(a,c);return}if(h&&"set"in h&&i&&(g=h.set(a,d,c))!==b)return g;a.setAttribute(c,""+d);return d}if(h&&"get"in h&&i&&(g=h.get(a,c))!==null)return g;g=a.getAttribute(c);return g===null?b:g}},removeAttr:function(a,b){var c,d,e,g,h,i=0;if(b&&a.nodeType===1){d=b.toLowerCase().split(p),g=d.length;for(;i=0}})});var z=/^(?:textarea|input|select)$/i,A=/^([^\.]*)?(?:\.(.+))?$/,B=/(?:^|\s)hover(\.\S+)?\b/,C=/^key/,D=/^(?:mouse|contextmenu)|click/,E=/^(?:focusinfocus|focusoutblur)$/,F=/^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/,G=function( +a){var b=F.exec(a);b&&(b[1]=(b[1]||"").toLowerCase(),b[3]=b[3]&&new RegExp("(?:^|\\s)"+b[3]+"(?:\\s|$)"));return b},H=function(a,b){var c=a.attributes||{};return(!b[1]||a.nodeName.toLowerCase()===b[1])&&(!b[2]||(c.id||{}).value===b[2])&&(!b[3]||b[3].test((c["class"]||{}).value))},I=function(a){return f.event.special.hover?a:a.replace(B,"mouseenter$1 mouseleave$1")};f.event={add:function(a,c,d,e,g){var h,i,j,k,l,m,n,o,p,q,r,s;if(!(a.nodeType===3||a.nodeType===8||!c||!d||!(h=f._data(a)))){d.handler&&(p=d,d=p.handler,g=p.selector),d.guid||(d.guid=f.guid++),j=h.events,j||(h.events=j={}),i=h.handle,i||(h.handle=i=function(a){return typeof f!="undefined"&&(!a||f.event.triggered!==a.type)?f.event.dispatch.apply(i.elem,arguments):b},i.elem=a),c=f.trim(I(c)).split(" ");for(k=0;k=0&&(h=h.slice(0,-1),k=!0),h.indexOf(".")>=0&&(i=h.split("."),h=i.shift(),i.sort());if((!e||f.event.customEvent[h])&&!f.event.global[h])return;c=typeof c=="object"?c[f.expando]?c:new f.Event(h,c):new f.Event(h),c.type=h,c.isTrigger=!0,c.exclusive=k,c.namespace=i.join("."),c.namespace_re=c.namespace?new RegExp("(^|\\.)"+i.join("\\.(?:.*\\.)?")+"(\\.|$)"):null,o=h.indexOf(":")<0?"on"+h:"";if(!e){j=f.cache;for(l in j)j[l].events&&j[l].events[h]&&f.event.trigger(c,d,j[l].handle.elem,!0);return}c.result=b,c.target||(c.target=e),d=d!=null?f.makeArray(d):[],d.unshift(c),p=f.event.special[h]||{};if(p.trigger&&p.trigger.apply(e,d)===!1)return;r=[[e,p.bindType||h]];if(!g&&!p.noBubble&&!f.isWindow(e)){s=p.delegateType||h,m=E.test(s+h)?e:e.parentNode,n=null;for(;m;m=m.parentNode)r.push([m,s]),n=m;n&&n===e.ownerDocument&&r.push([n.defaultView||n.parentWindow||a,s])}for(l=0;le&&j.push({elem:this,matches:d.slice(e)});for(k=0;k0?this.on(b,null,a,c):this.trigger(b)},f.attrFn&&(f.attrFn[b]=!0),C.test(b)&&(f.event.fixHooks[b]=f.event.keyHooks),D.test(b)&&(f.event.fixHooks[b]=f.event.mouseHooks)}),function(){function x(a,b,c,e,f,g){for(var h=0,i=e.length;h0){k=j;break}}j=j[a]}e[h]=k}}}function w(a,b,c,e,f,g){for(var h=0,i=e.length;h+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,d="sizcache"+(Math.random()+"").replace(".",""),e=0,g=Object.prototype.toString,h=!1,i=!0,j=/\\/g,k=/\r\n/g,l=/\W/;[0,0].sort(function(){i=!1;return 0});var m=function(b,d,e,f){e=e||[],d=d||c;var h=d;if(d.nodeType!==1&&d.nodeType!==9)return[];if(!b||typeof b!="string")return e;var i,j,k,l,n,q,r,t,u=!0,v=m.isXML(d),w=[],x=b;do{a.exec(""),i=a.exec(x);if(i){x=i[3],w.push(i[1]);if(i[2]){l=i[3];break}}}while(i);if(w.length>1&&p.exec(b))if(w.length===2&&o.relative[w[0]])j=y(w[0]+w[1],d,f);else{j=o.relative[w[0]]?[d]:m(w.shift(),d);while(w.length)b=w.shift(),o.relative[b]&&(b+=w.shift()),j=y(b,j,f)}else{!f&&w.length>1&&d.nodeType===9&&!v&&o.match.ID.test(w[0])&&!o.match.ID.test(w[w.length-1])&&(n=m.find(w.shift(),d,v),d=n.expr?m.filter(n.expr,n.set)[0]:n.set[0]);if(d){n=f?{expr:w.pop(),set:s(f)}:m.find(w.pop(),w.length===1&&(w[0]==="~"||w[0]==="+")&&d.parentNode?d.parentNode:d,v),j=n.expr?m.filter(n.expr,n.set):n.set,w.length>0?k=s(j):u=!1;while(w.length)q=w.pop(),r=q,o.relative[q]?r=w.pop():q="",r==null&&(r=d),o.relative[q](k,r,v)}else k=w=[]}k||(k=j),k||m.error(q||b);if(g.call(k)==="[object Array]")if(!u)e.push.apply(e,k);else if(d&&d.nodeType===1)for(t=0;k[t]!=null;t++)k[t]&&(k[t]===!0||k[t].nodeType===1&&m.contains(d,k[t]))&&e.push(j[t]);else for(t=0;k[t]!=null;t++)k[t]&&k[t].nodeType===1&&e.push(j[t]);else s(k,e);l&&(m(l,h,e,f),m.uniqueSort(e));return e};m.uniqueSort=function(a){if(u){h=i,a.sort(u);if(h)for(var b=1;b0},m.find=function(a,b,c){var d,e,f,g,h,i;if(!a)return[];for(e=0,f=o.order.length;e":function(a,b){var c,d=typeof b=="string",e=0,f=a.length;if(d&&!l.test(b)){b=b.toLowerCase();for(;e=0)?c||d.push(h):c&&(b[g]=!1));return!1},ID:function(a){return a[1].replace(j,"")},TAG:function(a,b){return a[1].replace(j,"").toLowerCase()},CHILD:function(a){if(a[1]==="nth"){a[2]||m.error(a[0]),a[2]=a[2].replace(/^\+|\s*/g,"");var b=/(-?)(\d*)(?:n([+\-]?\d*))?/.exec(a[2]==="even"&&"2n"||a[2]==="odd"&&"2n+1"||!/\D/.test(a[2])&&"0n+"+a[2]||a[2]);a[2]=b[1]+(b[2]||1)-0,a[3]=b[3]-0}else a[2]&&m.error(a[0]);a[0]=e++;return a},ATTR:function(a,b,c,d,e,f){var g=a[1]=a[1].replace(j,"");!f&&o.attrMap[g]&&(a[1]=o.attrMap[g]),a[4]=(a[4]||a[5]||"").replace(j,""),a[2]==="~="&&(a[4]=" "+a[4]+" ");return a},PSEUDO:function(b,c,d,e,f){if(b[1]==="not")if((a.exec(b[3])||"").length>1||/^\w/.test(b[3]))b[3]=m(b[3],null,null,c);else{var g=m.filter(b[3],c,d,!0^f);d||e.push.apply(e,g);return!1}else if(o.match.POS.test(b[0])||o.match.CHILD.test(b[0]))return!0;return b},POS:function(a){a.unshift(!0);return a}},filters:{enabled:function(a){return a.disabled===!1&&a.type!=="hidden"},disabled:function(a){return a.disabled===!0},checked:function(a){return a.checked===!0},selected:function(a){a.parentNode&&a.parentNode.selectedIndex;return a.selected===!0},parent:function(a){return!!a.firstChild},empty:function(a){return!a.firstChild},has:function(a,b,c){return!!m(c[3],a).length},header:function(a){return/h\d/i.test(a.nodeName)},text:function(a){var b=a.getAttribute("type"),c=a.type;return a.nodeName.toLowerCase()==="input"&&"text"===c&&(b===c||b===null)},radio:function(a){return a.nodeName.toLowerCase()==="input"&&"radio"===a.type},checkbox:function(a){return a.nodeName.toLowerCase()==="input"&&"checkbox"===a.type},file:function(a){return a.nodeName.toLowerCase()==="input"&&"file"===a.type},password:function(a){return a.nodeName.toLowerCase()==="input"&&"password"===a.type},submit:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"submit"===a.type},image:function(a){return a.nodeName.toLowerCase()==="input"&&"image"===a.type},reset:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"reset"===a.type},button:function(a){var b=a.nodeName.toLowerCase();return b==="input"&&"button"===a.type||b==="button"},input:function(a){return/input|select|textarea|button/i.test(a.nodeName)},focus:function(a){return a===a.ownerDocument.activeElement}},setFilters:{first:function(a,b){return b===0},last:function(a,b,c,d){return b===d.length-1},even:function(a,b){return b%2===0},odd:function(a,b){return b%2===1},lt:function(a,b,c){return bc[3]-0},nth:function(a,b,c){return c[3]-0===b},eq:function(a,b,c){return c[3]-0===b}},filter:{PSEUDO:function(a,b,c,d){var e=b[1],f=o.filters[e];if(f)return f(a,c,b,d);if(e==="contains")return(a.textContent||a.innerText||n([a])||"").indexOf(b[3])>=0;if(e==="not"){var g=b[3];for(var h=0,i=g.length;h=0}},ID:function(a,b){return a.nodeType===1&&a.getAttribute("id")===b},TAG:function(a,b){return b==="*"&&a.nodeType===1||!!a.nodeName&&a.nodeName.toLowerCase()===b},CLASS:function(a,b){return(" "+(a.className||a.getAttribute("class"))+" ").indexOf(b)>-1},ATTR:function(a,b){var c=b[1],d=m.attr?m.attr(a,c):o.attrHandle[c]?o.attrHandle[c](a):a[c]!=null?a[c]:a.getAttribute(c),e=d+"",f=b[2],g=b[4];return d==null?f==="!=":!f&&m.attr?d!=null:f==="="?e===g:f==="*="?e.indexOf(g)>=0:f==="~="?(" "+e+" ").indexOf(g)>=0:g?f==="!="?e!==g:f==="^="?e.indexOf(g)===0:f==="$="?e.substr(e.length-g.length)===g:f==="|="?e===g||e.substr(0,g.length+1)===g+"-":!1:e&&d!==!1},POS:function(a,b,c,d){var e=b[2],f=o.setFilters[e];if(f)return f(a,c,b,d)}}},p=o.match.POS,q=function(a,b){return"\\"+(b-0+1)};for(var r in o.match)o.match[r]=new RegExp(o.match[r].source+/(?![^\[]*\])(?![^\(]*\))/.source),o.leftMatch[r]=new RegExp(/(^(?:.|\r|\n)*?)/.source+o.match[r].source.replace(/\\(\d+)/g,q));o.match.globalPOS=p;var s=function(a,b){a=Array.prototype.slice.call(a,0);if(b){b.push.apply(b,a);return b}return a};try{Array.prototype.slice.call(c.documentElement.childNodes,0)[0].nodeType}catch(t){s=function(a,b){var c=0,d=b||[];if(g.call(a)==="[object Array]")Array.prototype.push.apply(d,a);else if(typeof a.length=="number")for(var e=a.length;c",e.insertBefore(a,e.firstChild),c.getElementById(d)&&(o.find.ID=function(a,c,d){if(typeof c.getElementById!="undefined"&&!d){var e=c.getElementById(a[1]);return e?e.id===a[1]||typeof e.getAttributeNode!="undefined"&&e.getAttributeNode("id").nodeValue===a[1]?[e]:b:[]}},o.filter.ID=function(a,b){var c=typeof a.getAttributeNode!="undefined"&&a.getAttributeNode("id");return a.nodeType===1&&c&&c.nodeValue===b}),e.removeChild(a),e=a=null}(),function(){var a=c.createElement("div");a.appendChild(c.createComment("")),a.getElementsByTagName("*").length>0&&(o.find.TAG=function(a,b){var c=b.getElementsByTagName(a[1]);if(a[1]==="*"){var d=[];for(var e=0;c[e];e++)c[e].nodeType===1&&d.push(c[e]);c=d}return c}),a.innerHTML="",a.firstChild&&typeof a.firstChild.getAttribute!="undefined"&&a.firstChild.getAttribute("href")!=="#"&&(o.attrHandle.href=function(a){return a.getAttribute("href",2)}),a=null}(),c.querySelectorAll&&function(){var a=m,b=c.createElement("div"),d="__sizzle__";b.innerHTML="

";if(!b.querySelectorAll||b.querySelectorAll(".TEST").length!==0){m=function(b,e,f,g){e=e||c;if(!g&&!m.isXML(e)){var h=/^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec(b);if(h&&(e.nodeType===1||e.nodeType===9)){if(h[1])return s(e.getElementsByTagName(b),f);if(h[2]&&o.find.CLASS&&e.getElementsByClassName)return s(e.getElementsByClassName(h[2]),f)}if(e.nodeType===9){if(b==="body"&&e.body)return s([e.body],f);if(h&&h[3]){var i=e.getElementById(h[3]);if(!i||!i.parentNode)return s([],f);if(i.id===h[3])return s([i],f)}try{return s(e.querySelectorAll(b),f)}catch(j){}}else if(e.nodeType===1&&e.nodeName.toLowerCase()!=="object"){var k=e,l=e.getAttribute("id"),n=l||d,p=e.parentNode,q=/^\s*[+~]/.test(b);l?n=n.replace(/'/g,"\\$&"):e.setAttribute("id",n),q&&p&&(e=e.parentNode);try{if(!q||p)return s(e.querySelectorAll("[id='"+n+"'] "+b),f)}catch(r){}finally{l||k.removeAttribute("id")}}}return a(b,e,f,g)};for(var e in a)m[e]=a[e];b=null}}(),function(){var a=c.documentElement,b=a.matchesSelector||a.mozMatchesSelector||a.webkitMatchesSelector||a.msMatchesSelector;if(b){var d=!b.call(c.createElement("div"),"div"),e=!1;try{b.call(c.documentElement,"[test!='']:sizzle")}catch(f){e=!0}m.matchesSelector=function(a,c){c=c.replace(/\=\s*([^'"\]]*)\s*\]/g,"='$1']");if(!m.isXML(a))try{if(e||!o.match.PSEUDO.test(c)&&!/!=/.test(c)){var f=b.call(a,c);if(f||!d||a.document&&a.document.nodeType!==11)return f}}catch(g){}return m(c,null,null,[a]).length>0}}}(),function(){var a=c.createElement("div");a.innerHTML="
";if(!!a.getElementsByClassName&&a.getElementsByClassName("e").length!==0){a.lastChild.className="e";if(a.getElementsByClassName("e").length===1)return;o.order.splice(1,0,"CLASS"),o.find.CLASS=function(a,b,c){if(typeof b.getElementsByClassName!="undefined"&&!c)return b.getElementsByClassName(a[1])},a=null}}(),c.documentElement.contains?m.contains=function(a,b){return a!==b&&(a.contains?a.contains(b):!0)}:c.documentElement.compareDocumentPosition?m.contains=function(a,b){return!!(a.compareDocumentPosition(b)&16)}:m.contains=function(){return!1},m.isXML=function(a){var b=(a?a.ownerDocument||a:0).documentElement;return b?b.nodeName!=="HTML":!1};var y=function(a,b,c){var d,e=[],f="",g=b.nodeType?[b]:b;while(d=o.match.PSEUDO.exec(a))f+=d[0],a=a.replace(o.match.PSEUDO,"");a=o.relative[a]?a+"*":a;for(var h=0,i=g.length;h0)for(h=g;h=0:f.filter(a,this).length>0:this.filter(a).length>0)},closest:function(a,b){var c=[],d,e,g=this[0];if(f.isArray(a)){var h=1;while(g&&g.ownerDocument&&g!==b){for(d=0;d-1:f.find.matchesSelector(g,a)){c.push(g);break}g=g.parentNode;if(!g||!g.ownerDocument||g===b||g.nodeType===11)break}}c=c.length>1?f.unique(c):c;return this.pushStack(c,"closest",a)},index:function(a){if(!a)return this[0]&&this[0].parentNode?this.prevAll().length:-1;if(typeof a=="string")return f.inArray(this[0],f(a));return f.inArray(a.jquery?a[0]:a,this)},add:function(a,b){var c=typeof a=="string"?f(a,b):f.makeArray(a&&a.nodeType?[a]:a),d=f.merge(this.get(),c);return this.pushStack(S(c[0])||S(d[0])?d:f.unique(d))},andSelf:function(){return this.add(this.prevObject)}}),f.each({parent:function(a){var b=a.parentNode;return b&&b.nodeType!==11?b:null},parents:function(a){return f.dir(a,"parentNode")},parentsUntil:function(a,b,c){return f.dir(a,"parentNode",c)},next:function(a){return f.nth(a,2,"nextSibling")},prev:function(a){return f.nth(a,2,"previousSibling")},nextAll:function(a){return f.dir(a,"nextSibling")},prevAll:function(a){return f.dir(a,"previousSibling")},nextUntil:function(a,b,c){return f.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return f.dir(a,"previousSibling",c)},siblings:function(a){return f.sibling((a.parentNode||{}).firstChild,a)},children:function(a){return f.sibling(a.firstChild)},contents:function(a){return f.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:f.makeArray(a.childNodes)}},function(a,b){f.fn[a]=function(c,d){var e=f.map(this,b,c);L.test(a)||(d=c),d&&typeof d=="string"&&(e=f.filter(d,e)),e=this.length>1&&!R[a]?f.unique(e):e,(this.length>1||N.test(d))&&M.test(a)&&(e=e.reverse());return this.pushStack(e,a,P.call(arguments).join(","))}}),f.extend({filter:function(a,b,c){c&&(a=":not("+a+")");return b.length===1?f.find.matchesSelector(b[0],a)?[b[0]]:[]:f.find.matches(a,b)},dir:function(a,c,d){var e=[],g=a[c];while(g&&g.nodeType!==9&&(d===b||g.nodeType!==1||!f(g).is(d)))g.nodeType===1&&e.push(g),g=g[c];return e},nth:function(a,b,c,d){b=b||1;var e=0;for(;a;a=a[c])if(a.nodeType===1&&++e===b)break;return a},sibling:function(a,b){var c=[];for(;a;a=a.nextSibling)a.nodeType===1&&a!==b&&c.push(a);return c}});var V="abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",W=/ jQuery\d+="(?:\d+|null)"/g,X=/^\s+/,Y=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,Z=/<([\w:]+)/,$=/]","i"),bd=/checked\s*(?:[^=]|=\s*.checked.)/i,be=/\/(java|ecma)script/i,bf=/^\s*",""],legend:[1,"
","
"],thead:[1,"","
"],tr:[2,"","
"],td:[3,"","
"],col:[2,"","
"],area:[1,"",""],_default:[0,"",""]},bh=U(c);bg.optgroup=bg.option,bg.tbody=bg.tfoot=bg.colgroup=bg.caption=bg.thead,bg.th=bg.td,f.support.htmlSerialize||(bg._default=[1,"div
","
"]),f.fn.extend({text:function(a){return f.access(this,function(a){return a===b?f.text(this):this.empty().append((this[0]&&this[0].ownerDocument||c).createTextNode(a))},null,a,arguments.length)},wrapAll:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapAll(a.call(this,b))});if(this[0]){var b=f(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&a.firstChild.nodeType===1)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapInner(a.call(this,b))});return this.each(function(){var b=f(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=f.isFunction(a);return this.each(function(c){f(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){f.nodeName(this,"body")||f(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.appendChild(a)})},prepend:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.insertBefore(a,this.firstChild)})},before:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this)});if(arguments.length){var a=f +.clean(arguments);a.push.apply(a,this.toArray());return this.pushStack(a,"before",arguments)}},after:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this.nextSibling)});if(arguments.length){var a=this.pushStack(this,"after",arguments);a.push.apply(a,f.clean(arguments));return a}},remove:function(a,b){for(var c=0,d;(d=this[c])!=null;c++)if(!a||f.filter(a,[d]).length)!b&&d.nodeType===1&&(f.cleanData(d.getElementsByTagName("*")),f.cleanData([d])),d.parentNode&&d.parentNode.removeChild(d);return this},empty:function(){for(var a=0,b;(b=this[a])!=null;a++){b.nodeType===1&&f.cleanData(b.getElementsByTagName("*"));while(b.firstChild)b.removeChild(b.firstChild)}return this},clone:function(a,b){a=a==null?!1:a,b=b==null?a:b;return this.map(function(){return f.clone(this,a,b)})},html:function(a){return f.access(this,function(a){var c=this[0]||{},d=0,e=this.length;if(a===b)return c.nodeType===1?c.innerHTML.replace(W,""):null;if(typeof a=="string"&&!ba.test(a)&&(f.support.leadingWhitespace||!X.test(a))&&!bg[(Z.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(Y,"<$1>");try{for(;d1&&l0?this.clone(!0):this).get();f(e[h])[b](j),d=d.concat(j)}return this.pushStack(d,a,e.selector)}}),f.extend({clone:function(a,b,c){var d,e,g,h=f.support.html5Clone||f.isXMLDoc(a)||!bc.test("<"+a.nodeName+">")?a.cloneNode(!0):bo(a);if((!f.support.noCloneEvent||!f.support.noCloneChecked)&&(a.nodeType===1||a.nodeType===11)&&!f.isXMLDoc(a)){bk(a,h),d=bl(a),e=bl(h);for(g=0;d[g];++g)e[g]&&bk(d[g],e[g])}if(b){bj(a,h);if(c){d=bl(a),e=bl(h);for(g=0;d[g];++g)bj(d[g],e[g])}}d=e=null;return h},clean:function(a,b,d,e){var g,h,i,j=[];b=b||c,typeof b.createElement=="undefined"&&(b=b.ownerDocument||b[0]&&b[0].ownerDocument||c);for(var k=0,l;(l=a[k])!=null;k++){typeof l=="number"&&(l+="");if(!l)continue;if(typeof l=="string")if(!_.test(l))l=b.createTextNode(l);else{l=l.replace(Y,"<$1>");var m=(Z.exec(l)||["",""])[1].toLowerCase(),n=bg[m]||bg._default,o=n[0],p=b.createElement("div"),q=bh.childNodes,r;b===c?bh.appendChild(p):U(b).appendChild(p),p.innerHTML=n[1]+l+n[2];while(o--)p=p.lastChild;if(!f.support.tbody){var s=$.test(l),t=m==="table"&&!s?p.firstChild&&p.firstChild.childNodes:n[1]===""&&!s?p.childNodes:[];for(i=t.length-1;i>=0;--i)f.nodeName(t[i],"tbody")&&!t[i].childNodes.length&&t[i].parentNode.removeChild(t[i])}!f.support.leadingWhitespace&&X.test(l)&&p.insertBefore(b.createTextNode(X.exec(l)[0]),p.firstChild),l=p.childNodes,p&&(p.parentNode.removeChild(p),q.length>0&&(r=q[q.length-1],r&&r.parentNode&&r.parentNode.removeChild(r)))}var u;if(!f.support.appendChecked)if(l[0]&&typeof (u=l.length)=="number")for(i=0;i1)},f.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=by(a,"opacity");return c===""?"1":c}return a.style.opacity}}},cssNumber:{fillOpacity:!0,fontWeight:!0,lineHeight:!0,opacity:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":f.support.cssFloat?"cssFloat":"styleFloat"},style:function(a,c,d,e){if(!!a&&a.nodeType!==3&&a.nodeType!==8&&!!a.style){var g,h,i=f.camelCase(c),j=a.style,k=f.cssHooks[i];c=f.cssProps[i]||i;if(d===b){if(k&&"get"in k&&(g=k.get(a,!1,e))!==b)return g;return j[c]}h=typeof d,h==="string"&&(g=bu.exec(d))&&(d=+(g[1]+1)*+g[2]+parseFloat(f.css(a,c)),h="number");if(d==null||h==="number"&&isNaN(d))return;h==="number"&&!f.cssNumber[i]&&(d+="px");if(!k||!("set"in k)||(d=k.set(a,d))!==b)try{j[c]=d}catch(l){}}},css:function(a,c,d){var e,g;c=f.camelCase(c),g=f.cssHooks[c],c=f.cssProps[c]||c,c==="cssFloat"&&(c="float");if(g&&"get"in g&&(e=g.get(a,!0,d))!==b)return e;if(by)return by(a,c)},swap:function(a,b,c){var d={},e,f;for(f in b)d[f]=a.style[f],a.style[f]=b[f];e=c.call(a);for(f in b)a.style[f]=d[f];return e}}),f.curCSS=f.css,c.defaultView&&c.defaultView.getComputedStyle&&(bz=function(a,b){var c,d,e,g,h=a.style;b=b.replace(br,"-$1").toLowerCase(),(d=a.ownerDocument.defaultView)&&(e=d.getComputedStyle(a,null))&&(c=e.getPropertyValue(b),c===""&&!f.contains(a.ownerDocument.documentElement,a)&&(c=f.style(a,b))),!f.support.pixelMargin&&e&&bv.test(b)&&bt.test(c)&&(g=h.width,h.width=c,c=e.width,h.width=g);return c}),c.documentElement.currentStyle&&(bA=function(a,b){var c,d,e,f=a.currentStyle&&a.currentStyle[b],g=a.style;f==null&&g&&(e=g[b])&&(f=e),bt.test(f)&&(c=g.left,d=a.runtimeStyle&&a.runtimeStyle.left,d&&(a.runtimeStyle.left=a.currentStyle.left),g.left=b==="fontSize"?"1em":f,f=g.pixelLeft+"px",g.left=c,d&&(a.runtimeStyle.left=d));return f===""?"auto":f}),by=bz||bA,f.each(["height","width"],function(a,b){f.cssHooks[b]={get:function(a,c,d){if(c)return a.offsetWidth!==0?bB(a,b,d):f.swap(a,bw,function(){return bB(a,b,d)})},set:function(a,b){return bs.test(b)?b+"px":b}}}),f.support.opacity||(f.cssHooks.opacity={get:function(a,b){return bq.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?parseFloat(RegExp.$1)/100+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=f.isNumeric(b)?"alpha(opacity="+b*100+")":"",g=d&&d.filter||c.filter||"";c.zoom=1;if(b>=1&&f.trim(g.replace(bp,""))===""){c.removeAttribute("filter");if(d&&!d.filter)return}c.filter=bp.test(g)?g.replace(bp,e):g+" "+e}}),f(function(){f.support.reliableMarginRight||(f.cssHooks.marginRight={get:function(a,b){return f.swap(a,{display:"inline-block"},function(){return b?by(a,"margin-right"):a.style.marginRight})}})}),f.expr&&f.expr.filters&&(f.expr.filters.hidden=function(a){var b=a.offsetWidth,c=a.offsetHeight;return b===0&&c===0||!f.support.reliableHiddenOffsets&&(a.style&&a.style.display||f.css(a,"display"))==="none"},f.expr.filters.visible=function(a){return!f.expr.filters.hidden(a)}),f.each({margin:"",padding:"",border:"Width"},function(a,b){f.cssHooks[a+b]={expand:function(c){var d,e=typeof c=="string"?c.split(" "):[c],f={};for(d=0;d<4;d++)f[a+bx[d]+b]=e[d]||e[d-2]||e[0];return f}}});var bC=/%20/g,bD=/\[\]$/,bE=/\r?\n/g,bF=/#.*$/,bG=/^(.*?):[ \t]*([^\r\n]*)\r?$/mg,bH=/^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,bI=/^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,bJ=/^(?:GET|HEAD)$/,bK=/^\/\//,bL=/\?/,bM=/)<[^<]*)*<\/script>/gi,bN=/^(?:select|textarea)/i,bO=/\s+/,bP=/([?&])_=[^&]*/,bQ=/^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,bR=f.fn.load,bS={},bT={},bU,bV,bW=["*/"]+["*"];try{bU=e.href}catch(bX){bU=c.createElement("a"),bU.href="",bU=bU.href}bV=bQ.exec(bU.toLowerCase())||[],f.fn.extend({load:function(a,c,d){if(typeof a!="string"&&bR)return bR.apply(this,arguments);if(!this.length)return this;var e=a.indexOf(" ");if(e>=0){var g=a.slice(e,a.length);a=a.slice(0,e)}var h="GET";c&&(f.isFunction(c)?(d=c,c=b):typeof c=="object"&&(c=f.param(c,f.ajaxSettings.traditional),h="POST"));var i=this;f.ajax({url:a,type:h,dataType:"html",data:c,complete:function(a,b,c){c=a.responseText,a.isResolved()&&(a.done(function(a){c=a}),i.html(g?f("
").append(c.replace(bM,"")).find(g):c)),d&&i.each(d,[c,b,a])}});return this},serialize:function(){return f.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?f.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||bN.test(this.nodeName)||bH.test(this.type))}).map(function(a,b){var c=f(this).val();return c==null?null:f.isArray(c)?f.map(c,function(a,c){return{name:b.name,value:a.replace(bE,"\r\n")}}):{name:b.name,value:c.replace(bE,"\r\n")}}).get()}}),f.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(a,b){f.fn[b]=function(a){return this.on(b,a)}}),f.each(["get","post"],function(a,c){f[c]=function(a,d,e,g){f.isFunction(d)&&(g=g||e,e=d,d=b);return f.ajax({type:c,url:a,data:d,success:e,dataType:g})}}),f.extend({getScript:function(a,c){return f.get(a,b,c,"script")},getJSON:function(a,b,c){return f.get(a,b,c,"json")},ajaxSetup:function(a,b){b?b$(a,f.ajaxSettings):(b=a,a=f.ajaxSettings),b$(a,b);return a},ajaxSettings:{url:bU,isLocal:bI.test(bV[1]),global:!0,type:"GET",contentType:"application/x-www-form-urlencoded; charset=UTF-8",processData:!0,async:!0,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":bW},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":a.String,"text html":!0,"text json":f.parseJSON,"text xml":f.parseXML},flatOptions:{context:!0,url:!0}},ajaxPrefilter:bY(bS),ajaxTransport:bY(bT),ajax:function(a,c){function w(a,c,l,m){if(s!==2){s=2,q&&clearTimeout(q),p=b,n=m||"",v.readyState=a>0?4:0;var o,r,u,w=c,x=l?ca(d,v,l):b,y,z;if(a>=200&&a<300||a===304){if(d.ifModified){if(y=v.getResponseHeader("Last-Modified"))f.lastModified[k]=y;if(z=v.getResponseHeader("Etag"))f.etag[k]=z}if(a===304)w="notmodified",o=!0;else try{r=cb(d,x),w="success",o=!0}catch(A){w="parsererror",u=A}}else{u=w;if(!w||a)w="error",a<0&&(a=0)}v.status=a,v.statusText=""+(c||w),o?h.resolveWith(e,[r,w,v]):h.rejectWith(e,[v,w,u]),v.statusCode(j),j=b,t&&g.trigger("ajax"+(o?"Success":"Error"),[v,d,o?r:u]),i.fireWith(e,[v,w]),t&&(g.trigger("ajaxComplete",[v,d]),--f.active||f.event.trigger("ajaxStop"))}}typeof a=="object"&&(c=a,a=b),c=c||{};var d=f.ajaxSetup({},c),e=d.context||d,g=e!==d&&(e.nodeType||e instanceof f)?f(e):f.event,h=f.Deferred(),i=f.Callbacks("once memory"),j=d.statusCode||{},k,l={},m={},n,o,p,q,r,s=0,t,u,v={readyState:0,setRequestHeader:function(a,b){if(!s){var c=a.toLowerCase();a=m[c]=m[c]||a,l[a]=b}return this},getAllResponseHeaders:function(){return s===2?n:null},getResponseHeader:function(a){var c;if(s===2){if(!o){o={};while(c=bG.exec(n))o[c[1].toLowerCase()]=c[2]}c=o[a.toLowerCase()]}return c===b?null:c},overrideMimeType:function(a){s||(d.mimeType=a);return this},abort:function(a){a=a||"abort",p&&p.abort(a),w(0,a);return this}};h.promise(v),v.success=v.done,v.error=v.fail,v.complete=i.add,v.statusCode=function(a){if(a){var b;if(s<2)for(b in a)j[b]=[j[b],a[b]];else b=a[v.status],v.then(b,b)}return this},d.url=((a||d.url)+"").replace(bF,"").replace(bK,bV[1]+"//"),d.dataTypes=f.trim(d.dataType||"*").toLowerCase().split(bO),d.crossDomain==null&&(r=bQ.exec(d.url.toLowerCase()),d.crossDomain=!(!r||r[1]==bV[1]&&r[2]==bV[2]&&(r[3]||(r[1]==="http:"?80:443))==(bV[3]||(bV[1]==="http:"?80:443)))),d.data&&d.processData&&typeof d.data!="string"&&(d.data=f.param(d.data,d.traditional)),bZ(bS,d,c,v);if(s===2)return!1;t=d.global,d.type=d.type.toUpperCase(),d.hasContent=!bJ.test(d.type),t&&f.active++===0&&f.event.trigger("ajaxStart");if(!d.hasContent){d.data&&(d.url+=(bL.test(d.url)?"&":"?")+d.data,delete d.data),k=d.url;if(d.cache===!1){var x=f.now(),y=d.url.replace(bP,"$1_="+x);d.url=y+(y===d.url?(bL.test(d.url)?"&":"?")+"_="+x:"")}}(d.data&&d.hasContent&&d.contentType!==!1||c.contentType)&&v.setRequestHeader("Content-Type",d.contentType),d.ifModified&&(k=k||d.url,f.lastModified[k]&&v.setRequestHeader("If-Modified-Since",f.lastModified[k]),f.etag[k]&&v.setRequestHeader("If-None-Match",f.etag[k])),v.setRequestHeader("Accept",d.dataTypes[0]&&d.accepts[d.dataTypes[0]]?d.accepts[d.dataTypes[0]]+(d.dataTypes[0]!=="*"?", "+bW+"; q=0.01":""):d.accepts["*"]);for(u in d.headers)v.setRequestHeader(u,d.headers[u]);if(d.beforeSend&&(d.beforeSend.call(e,v,d)===!1||s===2)){v.abort();return!1}for(u in{success:1,error:1,complete:1})v[u](d[u]);p=bZ(bT,d,c,v);if(!p)w(-1,"No Transport");else{v.readyState=1,t&&g.trigger("ajaxSend",[v,d]),d.async&&d.timeout>0&&(q=setTimeout(function(){v.abort("timeout")},d.timeout));try{s=1,p.send(l,w)}catch(z){if(s<2)w(-1,z);else throw z}}return v},param:function(a,c){var d=[],e=function(a,b){b=f.isFunction(b)?b():b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};c===b&&(c=f.ajaxSettings.traditional);if(f.isArray(a)||a.jquery&&!f.isPlainObject(a))f.each(a,function(){e(this.name,this.value)});else for(var g in a)b_(g,a[g],c,e);return d.join("&").replace(bC,"+")}}),f.extend({active:0,lastModified:{},etag:{}});var cc=f.now(),cd=/(\=)\?(&|$)|\?\?/i;f.ajaxSetup({jsonp:"callback",jsonpCallback:function(){return f.expando+"_"+cc++}}),f.ajaxPrefilter("json jsonp",function(b,c,d){var e=typeof b.data=="string"&&/^application\/x\-www\-form\-urlencoded/.test(b.contentType);if(b.dataTypes[0]==="jsonp"||b.jsonp!==!1&&(cd.test(b.url)||e&&cd.test(b.data))){var g,h=b.jsonpCallback=f.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,i=a[h],j=b.url,k=b.data,l="$1"+h+"$2";b.jsonp!==!1&&(j=j.replace(cd,l),b.url===j&&(e&&(k=k.replace(cd,l)),b.data===k&&(j+=(/\?/.test(j)?"&":"?")+b.jsonp+"="+h))),b.url=j,b.data=k,a[h]=function(a){g=[a]},d.always(function(){a[h]=i,g&&f.isFunction(i)&&a[h](g[0])}),b.converters["script json"]=function(){g||f.error(h+" was not called");return g[0]},b.dataTypes[0]="json";return"script"}}),f.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/javascript|ecmascript/},converters:{"text script":function(a){f.globalEval(a);return a}}}),f.ajaxPrefilter("script",function(a){a.cache===b&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),f.ajaxTransport("script",function(a){if(a.crossDomain){var d,e=c.head||c.getElementsByTagName("head")[0]||c.documentElement;return{send:function(f,g){d=c.createElement("script"),d.async="async",a.scriptCharset&&(d.charset=a.scriptCharset),d.src=a.url,d.onload=d.onreadystatechange=function(a,c){if(c||!d.readyState||/loaded|complete/.test(d.readyState))d.onload=d.onreadystatechange=null,e&&d.parentNode&&e.removeChild(d),d=b,c||g(200,"success")},e.insertBefore(d,e.firstChild)},abort:function(){d&&d.onload(0,1)}}}});var ce=a.ActiveXObject?function(){for(var a in cg)cg[a](0,1)}:!1,cf=0,cg;f.ajaxSettings.xhr=a.ActiveXObject?function(){return!this.isLocal&&ch()||ci()}:ch,function(a){f.extend(f.support,{ajax:!!a,cors:!!a&&"withCredentials"in a})}(f.ajaxSettings.xhr()),f.support.ajax&&f.ajaxTransport(function(c){if(!c.crossDomain||f.support.cors){var d;return{send:function(e,g){var h=c.xhr(),i,j;c.username?h.open(c.type,c.url,c.async,c.username,c.password):h.open(c.type,c.url,c.async);if(c.xhrFields)for(j in c.xhrFields)h[j]=c.xhrFields[j];c.mimeType&&h.overrideMimeType&&h.overrideMimeType(c.mimeType),!c.crossDomain&&!e["X-Requested-With"]&&(e["X-Requested-With"]="XMLHttpRequest");try{for(j in e)h.setRequestHeader(j,e[j])}catch(k){}h.send(c.hasContent&&c.data||null),d=function(a,e){var j,k,l,m,n;try{if(d&&(e||h.readyState===4)){d=b,i&&(h.onreadystatechange=f.noop,ce&&delete cg[i]);if(e)h.readyState!==4&&h.abort();else{j=h.status,l=h.getAllResponseHeaders(),m={},n=h.responseXML,n&&n.documentElement&&(m.xml=n);try{m.text=h.responseText}catch(a){}try{k=h.statusText}catch(o){k=""}!j&&c.isLocal&&!c.crossDomain?j=m.text?200:404:j===1223&&(j=204)}}}catch(p){e||g(-1,p)}m&&g(j,k,m,l)},!c.async||h.readyState===4?d():(i=++cf,ce&&(cg||(cg={},f(a).unload(ce)),cg[i]=d),h.onreadystatechange=d)},abort:function(){d&&d(0,1)}}}});var cj={},ck,cl,cm=/^(?:toggle|show|hide)$/,cn=/^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i,co,cp=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]],cq;f.fn.extend({show:function(a,b,c){var d,e;if(a||a===0)return this.animate(ct("show",3),a,b,c);for(var g=0,h=this.length;g=i.duration+this.startTime){this.now=this.end,this.pos=this.state=1,this.update(),i.animatedProperties[this.prop]=!0;for(b in i.animatedProperties)i.animatedProperties[b]!==!0&&(g=!1);if(g){i.overflow!=null&&!f.support.shrinkWrapBlocks&&f.each(["","X","Y"],function(a,b){h.style["overflow"+b]=i.overflow[a]}),i.hide&&f(h).hide();if(i.hide||i.show)for(b in i.animatedProperties)f.style(h,b,i.orig[b]),f.removeData(h,"fxshow"+b,!0),f.removeData(h,"toggle"+b,!0);d=i.complete,d&&(i.complete=!1,d.call(h))}return!1}i.duration==Infinity?this.now=e:(c=e-this.startTime,this.state=c/i.duration,this.pos=f.easing[i.animatedProperties[this.prop]](this.state,c,0,1,i.duration),this.now=this.start+(this.end-this.start)*this.pos),this.update();return!0}},f.extend(f.fx,{tick:function(){var a,b=f.timers,c=0;for(;c-1,k={},l={},m,n;j?(l=e.position(),m=l.top,n=l.left):(m=parseFloat(h)||0,n=parseFloat(i)||0),f.isFunction(b)&&(b=b.call(a,c,g)),b.top!=null&&(k.top=b.top-g.top+m),b.left!=null&&(k.left=b.left-g.left+n),"using"in b?b.using.call(a,k):e.css(k)}},f.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),c=this.offset(),d=cx.test(b[0].nodeName)?{top:0,left:0}:b.offset();c.top-=parseFloat(f.css(a,"marginTop"))||0,c.left-=parseFloat(f.css(a,"marginLeft"))||0,d.top+=parseFloat(f.css(b[0],"borderTopWidth"))||0,d.left+=parseFloat(f.css(b[0],"borderLeftWidth"))||0;return{top:c.top-d.top,left:c.left-d.left}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||c.body;while(a&&!cx.test(a.nodeName)&&f.css(a,"position")==="static")a=a.offsetParent;return a})}}),f.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,c){var d=/Y/.test(c);f.fn[a]=function(e){return f.access(this,function(a,e,g){var h=cy(a);if(g===b)return h?c in h?h[c]:f.support.boxModel&&h.document.documentElement[e]||h.document.body[e]:a[e];h?h.scrollTo(d?f(h).scrollLeft():g,d?g:f(h).scrollTop()):a[e]=g},a,e,arguments.length,null)}}),f.each({Height:"height",Width:"width"},function(a,c){var d="client"+a,e="scroll"+a,g="offset"+a;f.fn["inner"+a]=function(){var a=this[0];return a?a.style?parseFloat(f.css(a,c,"padding")):this[c]():null},f.fn["outer"+a]=function(a){var b=this[0];return b?b.style?parseFloat(f.css(b,c,a?"margin":"border")):this[c]():null},f.fn[c]=function(a){return f.access(this,function(a,c,h){var i,j,k,l;if(f.isWindow(a)){i=a.document,j=i.documentElement[d];return f.support.boxModel&&j||i.body&&i.body[d]||j}if(a.nodeType===9){i=a.documentElement;if(i[d]>=i[e])return i[d];return Math.max(a.body[e],i[e],a.body[g],i[g])}if(h===b){k=f.css(a,c),l=parseFloat(k);return f.isNumeric(l)?l:k}f(a).css(c,h)},c,a,arguments.length,null)}}),a.jQuery=a.$=f,typeof define=="function"&&define.amd&&define.amd.jQuery&&define("jquery",[],function(){return f})})(window); \ No newline at end of file diff --git a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.jar b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.jar similarity index 73% rename from settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.jar rename to settings/repository/com.google.code.cofoja/cofoja-1.0-r139.jar index 2197e721f..2cbdd380d 100644 Binary files a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.jar and b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.jar differ diff --git a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml similarity index 57% rename from settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml rename to settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml index 38d4e88f1..202d3d0a3 100644 --- a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml +++ b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml @@ -1,3 +1,3 @@ - +