package edu.mit.broad.sam; import edu.mit.broad.sam.util.CloseableIterator; import edu.mit.broad.sam.util.CoordMath; import edu.mit.broad.sam.util.RuntimeIOException; import java.io.File; import java.io.IOException; import java.util.*; /** * Factory class for creating SAMRecords for testing purposes. Various methods can be called * to add new SAM records (or pairs of records) to a list which can then be returned at * any point. The records must reference human chromosomes (excluding randoms etc.). * * Although this is a class for testing, it is in the src tree because it is included in the sam jarfile. * * @author Tim Fennell */ public class SAMRecordSetBuilder implements Iterable { private static final String[] chroms = { "chrM", "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY" }; private static final byte[] BASES = {'A','C','G','T'}; private static final String READ_GROUP_ID = "1"; private static final String SAMPLE = "FREE_SAMPLE"; private final Random random = new Random(); private SAMFileHeader header; private Collection records; private final int readLength = 36 ; private SAMProgramRecord programRecord = null; /** * Constructs a new SAMRecordSetBuilder with all the data needed to keep the records * sorted in coordinate order. */ public SAMRecordSetBuilder() { this(true, SAMFileHeader.SortOrder.coordinate); } public SAMRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) { final List sequences = new ArrayList(); for (final String chrom : chroms) { sequences.add(new SAMSequenceRecord(chrom)); } this.header = new SAMFileHeader(); this.header.setSequences(sequences); this.header.setSortOrder(sortOrder); if (sortForMe) { final SAMRecordComparator comparator; if (sortOrder == SAMFileHeader.SortOrder.queryname) { comparator = new SAMRecordQueryNameComparator(); } else { comparator = new SAMRecordCoordinateComparator(header); } this.records = new TreeSet(comparator); } else { this.records = new ArrayList(); } final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(READ_GROUP_ID); readGroupRecord.setSample(SAMPLE); final List readGroups = new ArrayList(); readGroups.add(readGroupRecord); this.header.setReadGroups(readGroups); } /** * Adds the given program record to the header, and assigns the PG tag to any SAMRecords * created after it has been added. May be called multiple times in order to assign different * PG IDs to different SAMRecords. programRecord may be null to stop assignment of PG tag. * It is up to the caller to ensure that program record IDs do not collide. */ public void setProgramRecord(SAMProgramRecord programRecord) { this.programRecord = programRecord; if (programRecord != null) { this.header.addProgramRecord(programRecord); } } /** Returns the accumulated list of sam records. */ public Collection getRecords() { return this.records; } /** Returns a CloseableIterator over the collection of SAMRecords. */ public CloseableIterator iterator() { return new CloseableIterator() { private final Iterator iterator = records.iterator(); public void close() { /** Do nothing. */ } public boolean hasNext() { return this.iterator.hasNext(); } public SAMRecord next() { return this.iterator.next(); } public void remove() { this.iterator.remove(); } }; } /** * Adds a skeletal fragment (non-PE) record to the set using the provided * contig start and strand information. */ public void addFrag(final String name, final int contig, final int start, final boolean negativeStrand) { final SAMRecord rec = new SAMRecord(); rec.setReadName(name); rec.setReferenceIndex(contig, this.header); rec.setReferenceName(chroms[contig]); rec.setAlignmentStart(start); rec.setReadNegativeStrandFlag(negativeStrand); rec.setCigarString(readLength + "M"); rec.setMappingQuality(255); rec.setAttribute(SAMTag.RG.name(), READ_GROUP_ID); if (programRecord != null) { rec.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); } fillInBasesAndQualities(rec); this.records.add(rec); } /** Adds an unmapped fragment read to the builder. */ public void addUnmappedFragment(final String name) { final SAMRecord rec = new SAMRecord(); rec.setReadName(name); rec.setReadUmappedFlag(true); rec.setAttribute(SAMTag.RG.name(), READ_GROUP_ID); if (programRecord != null) { rec.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); } fillInBasesAndQualities(rec); this.records.add(rec); } /** * Adds a skeletal fragment (non-PE) record to the set using the provided * contig start and strand information. The pair is assumed to be a well * formed pair sitting on a single contig. */ public void addPair(final String name, final int contig, final int start1, final int start2) { final SAMRecord end1 = new SAMRecord(); final SAMRecord end2 = new SAMRecord(); final boolean end1IsFirstOfPair = this.random.nextBoolean(); end1.setReadName(name); end1.setReferenceIndex(contig, this.header); end1.setAlignmentStart(start1); end1.setReadNegativeStrandFlag(false); end1.setCigarString(readLength + "M"); end1.setMappingQuality(255); end1.setReadPairedFlag(true); end1.setProperPairFlag(true); end1.setMateReferenceIndex(contig, this.header); end1.setMateAlignmentStart(start2); end1.setMateNegativeStrandFlag(true); end1.setFirstOfPairFlag(end1IsFirstOfPair); end1.setSecondOfPairFlag(!end1IsFirstOfPair); end1.setInferredInsertSize((int) CoordMath.getLength(start1, CoordMath.getEnd(start2, this.readLength))); end1.setAttribute(SAMTag.RG.name(), READ_GROUP_ID); if (programRecord != null) { end1.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); } fillInBasesAndQualities(end1); end2.setReadName(name); end2.setReferenceIndex(contig, this.header); end2.setAlignmentStart(start2); end2.setReadNegativeStrandFlag(true); end2.setCigarString(readLength + "M"); end2.setMappingQuality(255); end2.setReadPairedFlag(true); end2.setProperPairFlag(true); end2.setMateReferenceIndex(contig, this.header); end2.setMateAlignmentStart(start1); end2.setMateNegativeStrandFlag(false); end2.setFirstOfPairFlag(!end1IsFirstOfPair); end2.setSecondOfPairFlag(end1IsFirstOfPair); end2.setInferredInsertSize(end1.getInferredInsertSize()); end2.setAttribute(SAMTag.RG.name(), READ_GROUP_ID); if (programRecord != null) { end2.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); } fillInBasesAndQualities(end2); this.records.add(end1); this.records.add(end2); } /** Adds a pair with both ends unmapped to the builder. */ public void addUnmappedPair(final String name) { final SAMRecord end1 = new SAMRecord(); final SAMRecord end2 = new SAMRecord(); final boolean end1IsFirstOfPair = this.random.nextBoolean(); end1.setReadName(name); end1.setReadPairedFlag(false); end1.setReadUmappedFlag(true); end1.setProperPairFlag(false); end1.setFirstOfPairFlag(end1IsFirstOfPair); end1.setSecondOfPairFlag(!end1IsFirstOfPair); end1.setAttribute(SAMTag.RG.name(), READ_GROUP_ID); if (programRecord != null) { end1.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); } fillInBasesAndQualities(end1); end2.setReadName(name); end2.setReadPairedFlag(false); end2.setReadUmappedFlag(true); end2.setProperPairFlag(false); end2.setFirstOfPairFlag(!end1IsFirstOfPair); end2.setSecondOfPairFlag(end1IsFirstOfPair); end2.setAttribute(SAMTag.RG.name(), READ_GROUP_ID); if (programRecord != null) { end2.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); } fillInBasesAndQualities(end2); this.records.add(end1); this.records.add(end2); } /** * Fills in bases and qualities with randomly generated data. * Relies on the alignment start and end having been set to get read length. */ private void fillInBasesAndQualities(final SAMRecord rec) { final int length = this.readLength; final byte[] bases = new byte[length]; final byte[] quals = new byte[length]; for (int i=0; i