/* * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT * This software and its documentation are copyright 2009 by the * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ package edu.mit.broad.picard.sam; import java.io.File; import edu.mit.broad.picard.PicardException; import edu.mit.broad.picard.cmdline.CommandLineProgram; import edu.mit.broad.picard.cmdline.Option; import edu.mit.broad.picard.cmdline.Usage; import edu.mit.broad.picard.io.IoUtil; import edu.mit.broad.picard.metrics.AggregateMetricCollector; import edu.mit.broad.picard.metrics.MetricBase; import edu.mit.broad.picard.metrics.MetricCollector; import edu.mit.broad.picard.metrics.MetricsFile; import edu.mit.broad.picard.metrics.StringHeader; import edu.mit.broad.picard.reference.ReferenceSequence; import edu.mit.broad.picard.reference.ReferenceSequenceFile; import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory; import edu.mit.broad.picard.sam.CollectAlignmentSummaryMetrics.AlignmentSummaryMetrics.Type; import edu.mit.broad.picard.util.CoordMath; import edu.mit.broad.picard.util.Histogram; import edu.mit.broad.picard.util.SequenceUtil; import edu.mit.broad.sam.AlignmentBlock; import edu.mit.broad.sam.SAMFileHeader; import edu.mit.broad.sam.SAMFileReader; import edu.mit.broad.sam.SAMRecord; import edu.mit.broad.sam.util.CloseableIterator; /** * A command line tool to read a BAM file and produce standard alignment metrics that would be applicable to any alignment. * Metrics to include, but not limited to: * * Metrics are written for the first read of a pair, the second read, and combined for the pair. * * @author Doug Voet */ public class CollectAlignmentSummaryMetrics extends CommandLineProgram { private static final int MAPPING_QUALITY_THRESHOLD = 20; private static final int BASE_QUALITY_THRESHOLD = 20; // Usage and parameters @Usage(programVersion="1.0") public String USAGE = "Reads a SAM or BAM file and writes a file containing summary metrics.\n"; @Option(shortName="I", doc="SAM or BAM file") public File INPUT; @Option(shortName="O", doc="File to write insert size metrics to") public File OUTPUT; @Option(shortName="R", doc="Reference sequence file") public File REFERENCE; @Option(doc="If true (default), \"unsorted\" SAM/BAM files will be considerd coordinate sorted") public Boolean ASSUME_COODINATE_SORTED = Boolean.TRUE; private ReferenceSequenceFile ref; private ReferenceSequence refSequence; private SAMFileHeader samFileHeader; /** Required main method implementation. */ public static void main(String[] argv) { System.exit(new CollectAlignmentSummaryMetrics().instanceMain(argv)); } @Override protected int doWork() { IoUtil.assertFileIsReadable(INPUT); IoUtil.assertFileIsReadable(REFERENCE); IoUtil.assertFileIsWritable(OUTPUT); SAMFileReader in = new SAMFileReader(INPUT); assertCoordinateSortOrder(in); this.ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE); this.samFileHeader = in.getFileHeader(); MetricsFile> file = collectMetrics(in.iterator()); in.close(); file.write(OUTPUT); return 0; } private void assertCoordinateSortOrder(SAMFileReader in) { switch (in.getFileHeader().getSortOrder()) { case coordinate: break; case unsorted: if (this.ASSUME_COODINATE_SORTED) { break; } default: throw new PicardException("Cannot collect summary statistics in file " + INPUT.getAbsoluteFile() + " because it is not sorted in coordinate order."); } } private ReferenceSequence getReference(SAMRecord record) { while (refSequence == null || record.getReferenceIndex(samFileHeader) > refSequence.getContigIndex()) { refSequence = ref.nextSequence(); } if (refSequence == null || record.getReferenceIndex() != refSequence.getContigIndex()) { throw new PicardException("Cannot find reference sequence [" + record.getReferenceIndex() + "] in reference file"); } return refSequence; } /** * Does all the work of iterating through the sam file and collecting summary alignment metrics. */ private MetricsFile> collectMetrics( CloseableIterator samIterator) { final MetricCollector unpairedCollector = constructCollector(Type.UNPAIRED); final MetricCollector firstOfPairCollector = constructCollector(Type.FIRST_OF_PAIR); final MetricCollector secondOfPairCollector = constructCollector(Type.SECOND_OF_PAIR); final MetricCollector pairCollector = constructCollector(Type.PAIR); while (samIterator.hasNext()) { SAMRecord record = samIterator.next(); if (record.getReadPairedFlag()) { if (record.getFirstOfPairFlag()) { firstOfPairCollector.addRecord(record); } else { secondOfPairCollector.addRecord(record); } pairCollector.addRecord(record); } else { unpairedCollector.addRecord(record); } } firstOfPairCollector.onComplete(); secondOfPairCollector.onComplete(); pairCollector.onComplete(); unpairedCollector.onComplete(); MetricsFile> file = getMetricsFile(); file.addHeader(new StringHeader("Input file: " + INPUT.getAbsolutePath())); file.addHeader(new StringHeader("Output file: " + OUTPUT.getAbsolutePath())); file.addHeader(new StringHeader("Reference file: " + REFERENCE.getAbsolutePath())); if (firstOfPairCollector.getMetrics().TOTAL_READS > 0) { file.addMetric(firstOfPairCollector.getMetrics()); // override how bad cycle is determined for paired reads, it should be // the sum of first and second reads pairCollector.getMetrics().BAD_CYCLES = firstOfPairCollector.getMetrics().BAD_CYCLES + secondOfPairCollector.getMetrics().BAD_CYCLES; file.addMetric(secondOfPairCollector.getMetrics()); file.addMetric(pairCollector.getMetrics()); } if (unpairedCollector.getMetrics().TOTAL_READS > 0) { file.addMetric(unpairedCollector.getMetrics()); } return file; } private MetricCollector constructCollector(Type type) { MetricCollector collector = new AggregateMetricCollector(new ReadCounter(), new QualityMappingCounter()); collector.setMetrics(new AlignmentSummaryMetrics()); collector.getMetrics().TYPE = type; return collector; } public static class AlignmentSummaryMetrics extends MetricBase { public enum Type { UNPAIRED, FIRST_OF_PAIR, SECOND_OF_PAIR, PAIR } public Type TYPE; public long TOTAL_READS; public long PF_READS; public long PF_NOISE_READS; public long PF_READS_ALIGNED; public long PF_HQ_ALIGNED_READS; public long PF_HQ_ALIGNED_BASES; public long PF_HQ_ALIGNED_Q20_BASES; public double PF_HQ_MEDIAN_MISMATCHES; public double MEAN_READ_LENGTH; public long READS_ALIGNED_IN_PAIRS; public long BAD_CYCLES; public double STRAND_BALANCE; } /** counts reads that match various conditions */ private class ReadCounter implements MetricCollector { private long numPositiveStrand = 0; private Histogram readLengthHistogram = new Histogram(); private AlignmentSummaryMetrics metrics; @Override public void addRecord(SAMRecord record) { if (record.getNotPrimaryAlignmentFlag()) { // only want 1 count per read so skip non primary alignments return; } metrics.TOTAL_READS++; readLengthHistogram.increment(record.getReadBases().length); if (!record.getReadFailsVendorQualityCheckFlag()) { metrics.PF_READS++; if (isNoiseRead(record)) { metrics.PF_NOISE_READS++; } if (!record.getReadUnmappedFlag()) { metrics.PF_READS_ALIGNED++; } } if (!record.getReadUnmappedFlag() && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { metrics.READS_ALIGNED_IN_PAIRS++; } if (!record.getReadNegativeStrandFlag()) { numPositiveStrand++; } } @Override public void onComplete() { metrics.MEAN_READ_LENGTH = readLengthHistogram.getMean(); metrics.STRAND_BALANCE = numPositiveStrand / (double) metrics.TOTAL_READS; } private boolean isNoiseRead(SAMRecord record) { final Object noiseAttribute = record.getAttribute(ReservedTagConstants.XN); return (noiseAttribute != null && noiseAttribute.equals(1)); } @Override public void setMetrics(AlignmentSummaryMetrics metrics) { this.metrics = metrics; } @Override public AlignmentSummaryMetrics getMetrics() { return this.metrics; } } /** counts quality mappings & base calls that match various conditions */ private class QualityMappingCounter implements MetricCollector { private Histogram mismatchHistogram = new Histogram(); private Histogram badCycleHistogram = new Histogram(); private AlignmentSummaryMetrics metrics; @Override public void addRecord(SAMRecord record) { if (record.getNotPrimaryAlignmentFlag()) { return; } if (record.getReadUnmappedFlag()) { final byte[] readBases = record.getReadBases(); for (int i = 0; i < readBases.length; i++) { if (SequenceUtil.isNoCall(readBases[i])) { badCycleHistogram.increment(CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i)); } } } else { boolean highQualityMapping = isHighQualityMapping(record); if (highQualityMapping) metrics.PF_HQ_ALIGNED_READS++; final byte[] readBases = record.getReadBases(); final byte[] refBases = getReference(record).getBases(); final byte[] qualities = record.getBaseQualities(); long mismatchCount = 0; for (AlignmentBlock alignmentBlock : record.getAlignmentBlocks()) { final int readIndex = alignmentBlock.getReadStart() - 1; final int refIndex = alignmentBlock.getReferenceStart() - 1; final int length = alignmentBlock.getLength(); if (highQualityMapping) metrics.PF_HQ_ALIGNED_BASES += alignmentBlock.getLength(); for (int i=0; i= BASE_QUALITY_THRESHOLD) { metrics.PF_HQ_ALIGNED_Q20_BASES++; } if (mismatch) { mismatchCount++; } } if (mismatch || SequenceUtil.isNoCall(readBases[readBaseIndex])) { badCycleHistogram.increment(CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i)); } } } mismatchHistogram.increment(mismatchCount); } } private boolean isHighQualityMapping(SAMRecord record) { return !record.getReadFailsVendorQualityCheckFlag() && record.getMappingQuality() >= MAPPING_QUALITY_THRESHOLD; } @Override public void onComplete() { metrics.PF_HQ_MEDIAN_MISMATCHES = mismatchHistogram.getMedian(); metrics.BAD_CYCLES = 0; for (Histogram.Bin cycleBin : badCycleHistogram.values()) { double badCyclePercentage = cycleBin.getValue() / metrics.TOTAL_READS; if (badCyclePercentage >= .8) { metrics.BAD_CYCLES++; } } } @Override public void setMetrics(AlignmentSummaryMetrics metrics) { this.metrics = metrics; } @Override public AlignmentSummaryMetrics getMetrics() { return this.metrics; } } }