/* * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT * This software and its documentation are copyright 2008 by the * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ package edu.mit.broad.sam.apps; import edu.mit.broad.sam.*; import java.io.File; import java.util.HashMap; import java.util.List; import java.util.Map; public class CompareSAMs { public static void main(final String[] argv) { if (argv.length != 2) { System.err.println("ERROR: Incorrect number of arguments"); usage(); System.exit(1); } final CompareSAMs compareSAMs = new CompareSAMs(argv); if (!compareSAMs.areEqual()) { System.exit(1); } } private static void usage() { System.err.println("USAGE: CompareSAMS "); } private final String[] samFiles; private final SAMFileReader[] samReaders = new SAMFileReader[2]; private boolean sequenceDictionariesDiffer; private int mappingsMatch = 0; private int unmappedBoth = 0; private int unmappedLeft = 0; private int unmappedRight = 0; private int mappingsDiffer = 0; private int missingLeft = 0; private int missingRight = 0; private boolean areEqual; public CompareSAMs(final String[] samFiles) { this.samFiles = samFiles; for (int i = 0; i < samFiles.length; ++i) { samReaders[i] = new SAMFileReader(new File(samFiles[i])); } areEqual = compareHeaders(); areEqual = compareAlignments() && areEqual; printReport(); if (!areEqual) { System.out.println("SAM files differ."); } else { System.out.println("SAM files match."); } } private void printReport() { System.out.println("Match\t" + mappingsMatch); System.out.println("Differ\t" + mappingsDiffer); System.out.println("Unmapped_both\t" + unmappedBoth); System.out.println("Unmapped_left\t" + unmappedLeft); System.out.println("Unmapped_right\t" + unmappedRight); System.out.println("Missing_left\t" + missingLeft); System.out.println("Missing_right\t" + missingRight); } private boolean compareAlignments() { if (!compareValues(samReaders[0].getFileHeader().getSortOrder(), samReaders[1].getFileHeader().getSortOrder(), "Sort Order")) { System.out.println("Cannot compare alignments if sort orders differ."); return false; } switch (samReaders[0].getFileHeader().getSortOrder()) { case coordinate: if (sequenceDictionariesDiffer) { System.out.println("Cannot compare coordinate-sorted SAM files because sequence dictionaries differ."); return false; } return compareCoordinateSortedAlignments(); case queryname: return compareQueryNameSortedAlignments(); case unsorted: return compareUnsortedAlignments(); default: // unreachable assert(false); return false; } } private boolean compareCoordinateSortedAlignments() { final NotPrimarySkippingIterator itLeft = new NotPrimarySkippingIterator(samReaders[0].iterator()); final NotPrimarySkippingIterator itRight = new NotPrimarySkippingIterator(samReaders[1].iterator()); // Save any reads which haven't been matched during in-order scan. final Map leftUnmatched = new HashMap(); final Map rightUnmatched = new HashMap(); boolean ret = true; while (itLeft.hasCurrent()) { if (!itRight.hasCurrent()) { // Exhausted right side. See if any of the remaining left reads match // any of the saved right reads. for( ; itLeft.hasCurrent(); itLeft.advance()) { final SAMRecord left = itLeft.getCurrent(); final SAMRecord right = rightUnmatched.remove(left.getReadName()); if (right == null) { ++missingRight; } else { tallyAlignmentRecords(left, right); } } break; } // Don't assume stability of order beyond the coordinate. Therefore grab all the // reads from the left that has the same coordinate. final SAMRecord left = itLeft.getCurrent(); final Map leftCurrentCoordinate = new HashMap(); leftCurrentCoordinate.put(left.getReadName(), left); while (itLeft.advance()) { final SAMRecord nextLeft = itLeft.getCurrent(); if (compareAlignmentCoordinates(left, nextLeft) == 0) { leftCurrentCoordinate.put(nextLeft.getReadName(), nextLeft); } else { break; } } // Advance the right iterator until it is >= the left reads that have just been grabbed while (itRight.hasCurrent() && compareAlignmentCoordinates(left, itRight.getCurrent()) > 0) { final SAMRecord right = itRight.getCurrent(); rightUnmatched.put(right.getReadName(), right); itRight.advance(); } // For each right read that has the same coordinate as the current left reads, // see if there is a matching left read. If so, process and discard. If not, // save the right read for later. for (;itRight.hasCurrent() && compareAlignmentCoordinates(left, itRight.getCurrent()) == 0; itRight.advance()) { final SAMRecord right = itRight.getCurrent(); final SAMRecord matchingLeft = leftCurrentCoordinate.remove(right.getReadName()); if (matchingLeft != null) { ret = tallyAlignmentRecords(matchingLeft, right) && ret; } else { rightUnmatched.put(right.getReadName(), right); } } // Anything left in leftCurrentCoordinate has not been matched for (final SAMRecord samRecord : leftCurrentCoordinate.values()) { leftUnmatched.put(samRecord.getReadName(), samRecord); } } // The left iterator has been exhausted. See if any of the remaining right reads // match any of the saved left reads. for( ; itRight.hasCurrent(); itRight.advance()) { final SAMRecord right = itRight.getCurrent(); final SAMRecord left = leftUnmatched.remove(right.getReadName()); if (left != null) { tallyAlignmentRecords(left, right); } else { ++missingLeft; } } // Look up reads that were unmatched from left, and see if they are in rightUnmatched. // If found, remove from rightUnmatched and tally. for (final Map.Entry leftEntry : leftUnmatched.entrySet()) { final String readName = leftEntry.getKey(); final SAMRecord left = leftEntry.getValue(); final SAMRecord right = rightUnmatched.remove(readName); if (right == null) { ++missingRight; continue; } tallyAlignmentRecords(left, right); } // Any elements remaining in rightUnmatched are guaranteed not to be in leftUnmatched. missingLeft += rightUnmatched.size(); if (ret) { if (missingLeft > 0 || missingRight > 0 || mappingsDiffer > 0 || unmappedLeft > 0 || unmappedRight > 0) { ret = false; } } return ret; } private int compareAlignmentCoordinates(final SAMRecord left, final SAMRecord right) { final String leftReferenceName = left.getReferenceName(); final String rightReferenceName = right.getReferenceName(); if (leftReferenceName == null && rightReferenceName == null) { return 0; } else if (leftReferenceName == null) { return 1; } else if (rightReferenceName == null) { return -1; } final int leftReferenceIndex = samReaders[0].getFileHeader().getSequenceIndex(leftReferenceName); final int rightReferenceIndex = samReaders[0].getFileHeader().getSequenceIndex(rightReferenceName); assert(leftReferenceIndex >= 0); assert(rightReferenceIndex >= 0); if (leftReferenceIndex != rightReferenceIndex) { return leftReferenceIndex - rightReferenceIndex; } return left.getAlignmentStart() - right.getAlignmentStart(); } private boolean compareQueryNameSortedAlignments() { final NotPrimarySkippingIterator it1 = new NotPrimarySkippingIterator(samReaders[0].iterator()); final NotPrimarySkippingIterator it2 = new NotPrimarySkippingIterator(samReaders[1].iterator()); boolean ret = true; while (it1.hasCurrent()) { if (!it2.hasCurrent()) { missingRight += countRemaining(it1); return false; } final int cmp = it1.getCurrent().getReadName().compareTo(it2.getCurrent().getReadName()); if (cmp < 0) { ++missingRight; it1.advance(); ret = false; } else if (cmp > 0) { ++missingLeft; it2.advance(); ret = false; } else { if (!tallyAlignmentRecords(it1.getCurrent(), it2.getCurrent())) { ret = false; } it1.advance(); it2.advance(); } } if (it2.hasCurrent()) { missingLeft += countRemaining(it2); return false; } return ret; } private boolean compareUnsortedAlignments() { final NotPrimarySkippingIterator it1 = new NotPrimarySkippingIterator(samReaders[0].iterator()); final NotPrimarySkippingIterator it2 = new NotPrimarySkippingIterator(samReaders[1].iterator()); boolean ret = true; for (; it1.hasCurrent(); it1.advance(), it2.advance()) { if (!it2.hasCurrent()) { missingRight += countRemaining(it1); return false; } final SAMRecord s1 = it1.getCurrent(); final SAMRecord s2 = it2.getCurrent(); if (!compareValues(s1.getReadName(), s2.getReadName(), "Read names")) { System.out.println("Read names cease agreeing in unsorted SAM files . Comparison aborting."); } ret = tallyAlignmentRecords(s1, s2) && ret; } if (it2.hasCurrent()) { missingLeft += countRemaining(it2); return false; } return ret; } private int countRemaining(final NotPrimarySkippingIterator it) { int i; for (i = 0; it.hasCurrent(); ++i) { it.advance(); } return i; } private boolean tallyAlignmentRecords(final SAMRecord s1, final SAMRecord s2) { assert (s1.getReadName().equals(s2.getReadName())); if (s1.getReadUnmappedFlag() && s2.getReadUnmappedFlag()) { ++unmappedBoth; return true; } if (s1.getReadUnmappedFlag()) { ++unmappedLeft; return false; } if (s2.getReadUnmappedFlag()) { ++unmappedRight; return false; } final boolean ret = (s1.getReferenceName().equals(s2.getReferenceName()) && s1.getAlignmentStart() == s2.getAlignmentStart() && s1.getReadNegativeStrandFlag() == s1.getReadNegativeStrandFlag()); if (!ret) { ++mappingsDiffer; } else { ++mappingsMatch; } return ret; } private boolean compareHeaders() { final SAMFileHeader h1 = samReaders[0].getFileHeader(); final SAMFileHeader h2 = samReaders[1].getFileHeader(); boolean ret = compareValues(h1.getVersion(), h2.getVersion(), "File format version"); ret = compareValues(h1.getCreator(), h2.getCreator(), "File creator") && ret; ret = compareValues(h1.getAttribute("SO"), h2.getAttribute("SO"), "Sort order") && ret; if (!compareSequenceDictionaries(h1, h2)) { ret = false; sequenceDictionariesDiffer = true; } ret = compareReadGroups(h1, h2) && ret; ret = compareProgramRecords(h1, h2) && ret; return ret; } private boolean compareProgramRecords(final SAMFileHeader h1, final SAMFileHeader h2) { final List l1 = h1.getProgramRecords(); final List l2 = h2.getProgramRecords(); if (!compareValues(l1.size(), l2.size(), "Number of read groups")) { return false; } boolean ret = true; for (int i = 0; i < l1.size(); ++i) { ret = compareProgramRecord(l1.get(i), l2.get(i)) && ret; } return ret; } private boolean compareProgramRecord(final SAMProgramRecord programRecord1, final SAMProgramRecord programRecord2) { if (programRecord1 == null && programRecord2 == null) { return true; } if (programRecord1 == null) { reportDifference("null", programRecord2.getProgramGroupId(), "Program Record"); return false; } if (programRecord2 == null) { reportDifference(programRecord1.getProgramGroupId(), "null", "Program Record"); return false; } boolean ret = compareValues(programRecord1.getProgramGroupId(), programRecord2.getProgramGroupId(), "Program Name"); final String[] attributes = {"VN", "CL"}; for (final String attribute: attributes) { ret = compareValues(programRecord1.getAttribute(attribute), programRecord2.getAttribute(attribute), attribute + " Program Record attribute") && ret; } return ret; } private boolean compareReadGroups(final SAMFileHeader h1, final SAMFileHeader h2) { final List l1 = h1.getReadGroups(); final List l2 = h2.getReadGroups(); if (!compareValues(l1.size(), l2.size(), "Number of read groups")) { return false; } boolean ret = true; for (int i = 0; i < l1.size(); ++i) { ret = compareReadGroup(l1.get(i), l2.get(i)) && ret; } return ret; } private boolean compareReadGroup(final SAMReadGroupRecord samReadGroupRecord1, final SAMReadGroupRecord samReadGroupRecord2) { boolean ret = compareValues(samReadGroupRecord1.getReadGroupId(), samReadGroupRecord2.getReadGroupId(), "Read Group ID"); ret = compareValues(samReadGroupRecord1.getSample(), samReadGroupRecord2.getSample(), "Sample for read group " + samReadGroupRecord1.getReadGroupId()) && ret; ret = compareValues(samReadGroupRecord1.getLibrary(), samReadGroupRecord2.getLibrary(), "Library for read group " + samReadGroupRecord1.getReadGroupId()) && ret; final String[] attributes = {"DS", "PU", "PI", "CN", "DT", "PL"}; for (final String attribute : attributes) { ret = compareValues(samReadGroupRecord1.getAttribute(attribute), samReadGroupRecord2.getAttribute(attribute), attribute + " for read group " + samReadGroupRecord1.getReadGroupId()) && ret; } return ret; } private boolean compareSequenceDictionaries(final SAMFileHeader h1, final SAMFileHeader h2) { final List s1 = h1.getSequences(); final List s2 = h2.getSequences(); if (s1.size() != s2.size()) { reportDifference(s1.size(), s2.size(), "Length of sequence dictionaries"); return false; } boolean ret = true; for (int i = 0; i < s1.size(); ++i) { ret = compareSequenceRecord(s1.get(i), s2.get(i), i+1) && ret; } return ret; } private boolean compareSequenceRecord(final SAMSequenceRecord sequenceRecord1, final SAMSequenceRecord sequenceRecord2, final int which) { if (!sequenceRecord1.getSequenceName().equals(sequenceRecord2.getSequenceName())) { reportDifference(sequenceRecord1.getSequenceName(), sequenceRecord2.getSequenceName(), "Name of sequence record " + which); return false; } boolean ret = compareValues(sequenceRecord1.getSequenceLength(), sequenceRecord2.getSequenceLength(), "Length of sequence " + sequenceRecord1.getSequenceName()); ret = compareValues(sequenceRecord1.getSpecies(), sequenceRecord2.getSpecies(), "Species of sequence " + sequenceRecord1.getSequenceName()) && ret; ret = compareValues(sequenceRecord1.getAssembly(), sequenceRecord2.getAssembly(), "Assembly of sequence " + sequenceRecord1.getSequenceName()) && ret; ret = compareValues(sequenceRecord1.getAttribute("M5"), sequenceRecord2.getAttribute("M5"), "MD5 of sequence " + sequenceRecord1.getSequenceName()) && ret; ret = compareValues(sequenceRecord1.getAttribute("UR"), sequenceRecord2.getAttribute("UR"), "URI of sequence " + sequenceRecord1.getSequenceName()) && ret; return ret; } private boolean compareValues(final T v1, final T v2, final String label) { if (v1 == null) { if (v2 == null) { return true; } reportDifference(v1, v2, label); return false; } if (v2 == null) { reportDifference(v1, v2, label); return false; } if (!v1.equals(v2)) { reportDifference(v1, v2, label); return false; } return true; } private void reportDifference(final String s1, final String s2, final String label) { System.out.println(label + " differs."); System.out.println(samFiles[0] + ": " + s1); System.out.println(samFiles[1] + ": " + s2); } private void reportDifference(Object o1, Object o2, final String label) { if (o1 == null) { o1 = "null"; } if (o2 == null) { o2 = "null"; } reportDifference(o1.toString(), o2.toString(), label); } public int getMappingsMatch() { return mappingsMatch; } public int getUnmappedBoth() { return unmappedBoth; } public int getUnmappedLeft() { return unmappedLeft; } public int getUnmappedRight() { return unmappedRight; } public int getMappingsDiffer() { return mappingsDiffer; } public int getMissingLeft() { return missingLeft; } public int getMissingRight() { return missingRight; } public boolean areEqual() { return areEqual; } }