gatk-3.8/lib/edu/mit/broad/sam/SAMRecordSetBuilder.java

275 lines
10 KiB
Java

package edu.mit.broad.sam;
import edu.mit.broad.sam.util.CloseableIterator;
import edu.mit.broad.sam.util.CoordMath;
import edu.mit.broad.sam.util.RuntimeIOException;
import java.io.File;
import java.io.IOException;
import java.util.*;
/**
* Factory class for creating SAMRecords for testing purposes. Various methods can be called
* to add new SAM records (or pairs of records) to a list which can then be returned at
* any point. The records must reference human chromosomes (excluding randoms etc.).
*
* Although this is a class for testing, it is in the src tree because it is included in the sam jarfile.
*
* @author Tim Fennell
*/
public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
private static final String[] chroms = {
"chrM", "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
"chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20",
"chr21", "chr22", "chrX", "chrY"
};
private static final byte[] BASES = {'A','C','G','T'};
private static final String READ_GROUP_ID = "1";
private static final String SAMPLE = "FREE_SAMPLE";
private final Random random = new Random();
private SAMFileHeader header;
private Collection<SAMRecord> records;
private final int readLength = 36 ;
private SAMProgramRecord programRecord = null;
/**
* Constructs a new SAMRecordSetBuilder with all the data needed to keep the records
* sorted in coordinate order.
*/
public SAMRecordSetBuilder() {
this(true, SAMFileHeader.SortOrder.coordinate);
}
public SAMRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>();
for (final String chrom : chroms) {
sequences.add(new SAMSequenceRecord(chrom));
}
this.header = new SAMFileHeader();
this.header.setSequences(sequences);
this.header.setSortOrder(sortOrder);
if (sortForMe) {
final SAMRecordComparator comparator;
if (sortOrder == SAMFileHeader.SortOrder.queryname) {
comparator = new SAMRecordQueryNameComparator();
} else {
comparator = new SAMRecordCoordinateComparator(header);
}
this.records = new TreeSet<SAMRecord>(comparator);
} else {
this.records = new ArrayList<SAMRecord>();
}
final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(READ_GROUP_ID);
readGroupRecord.setSample(SAMPLE);
final List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
readGroups.add(readGroupRecord);
this.header.setReadGroups(readGroups);
}
/**
* Adds the given program record to the header, and assigns the PG tag to any SAMRecords
* created after it has been added. May be called multiple times in order to assign different
* PG IDs to different SAMRecords. programRecord may be null to stop assignment of PG tag.
* It is up to the caller to ensure that program record IDs do not collide.
*/
public void setProgramRecord(SAMProgramRecord programRecord) {
this.programRecord = programRecord;
if (programRecord != null) {
this.header.addProgramRecord(programRecord);
}
}
/** Returns the accumulated list of sam records. */
public Collection<SAMRecord> getRecords() { return this.records; }
/** Returns a CloseableIterator over the collection of SAMRecords. */
public CloseableIterator<SAMRecord> iterator() {
return new CloseableIterator<SAMRecord>() {
private final Iterator<SAMRecord> iterator = records.iterator();
public void close() { /** Do nothing. */ }
public boolean hasNext() { return this.iterator.hasNext(); }
public SAMRecord next() { return this.iterator.next(); }
public void remove() { this.iterator.remove(); }
};
}
/**
* Adds a skeletal fragment (non-PE) record to the set using the provided
* contig start and strand information.
*/
public void addFrag(final String name, final int contig, final int start, final boolean negativeStrand) {
final SAMRecord rec = new SAMRecord();
rec.setReadName(name);
rec.setReferenceIndex(contig, this.header);
rec.setReferenceName(chroms[contig]);
rec.setAlignmentStart(start);
rec.setReadNegativeStrandFlag(negativeStrand);
rec.setCigarString(readLength + "M");
rec.setMappingQuality(255);
rec.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
if (programRecord != null) {
rec.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
}
fillInBasesAndQualities(rec);
this.records.add(rec);
}
/** Adds an unmapped fragment read to the builder. */
public void addUnmappedFragment(final String name) {
final SAMRecord rec = new SAMRecord();
rec.setReadName(name);
rec.setReadUmappedFlag(true);
rec.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
if (programRecord != null) {
rec.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
}
fillInBasesAndQualities(rec);
this.records.add(rec);
}
/**
* Adds a skeletal fragment (non-PE) record to the set using the provided
* contig start and strand information. The pair is assumed to be a well
* formed pair sitting on a single contig.
*/
public void addPair(final String name, final int contig, final int start1, final int start2) {
final SAMRecord end1 = new SAMRecord();
final SAMRecord end2 = new SAMRecord();
final boolean end1IsFirstOfPair = this.random.nextBoolean();
end1.setReadName(name);
end1.setReferenceIndex(contig, this.header);
end1.setAlignmentStart(start1);
end1.setReadNegativeStrandFlag(false);
end1.setCigarString(readLength + "M");
end1.setMappingQuality(255);
end1.setReadPairedFlag(true);
end1.setProperPairFlag(true);
end1.setMateReferenceIndex(contig, this.header);
end1.setMateAlignmentStart(start2);
end1.setMateNegativeStrandFlag(true);
end1.setFirstOfPairFlag(end1IsFirstOfPair);
end1.setSecondOfPairFlag(!end1IsFirstOfPair);
end1.setInferredInsertSize((int) CoordMath.getLength(start1, CoordMath.getEnd(start2, this.readLength)));
end1.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
if (programRecord != null) {
end1.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
}
fillInBasesAndQualities(end1);
end2.setReadName(name);
end2.setReferenceIndex(contig, this.header);
end2.setAlignmentStart(start2);
end2.setReadNegativeStrandFlag(true);
end2.setCigarString(readLength + "M");
end2.setMappingQuality(255);
end2.setReadPairedFlag(true);
end2.setProperPairFlag(true);
end2.setMateReferenceIndex(contig, this.header);
end2.setMateAlignmentStart(start1);
end2.setMateNegativeStrandFlag(false);
end2.setFirstOfPairFlag(!end1IsFirstOfPair);
end2.setSecondOfPairFlag(end1IsFirstOfPair);
end2.setInferredInsertSize(end1.getInferredInsertSize());
end2.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
if (programRecord != null) {
end2.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
}
fillInBasesAndQualities(end2);
this.records.add(end1);
this.records.add(end2);
}
/** Adds a pair with both ends unmapped to the builder. */
public void addUnmappedPair(final String name) {
final SAMRecord end1 = new SAMRecord();
final SAMRecord end2 = new SAMRecord();
final boolean end1IsFirstOfPair = this.random.nextBoolean();
end1.setReadName(name);
end1.setReadPairedFlag(false);
end1.setReadUmappedFlag(true);
end1.setProperPairFlag(false);
end1.setFirstOfPairFlag(end1IsFirstOfPair);
end1.setSecondOfPairFlag(!end1IsFirstOfPair);
end1.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
if (programRecord != null) {
end1.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
}
fillInBasesAndQualities(end1);
end2.setReadName(name);
end2.setReadPairedFlag(false);
end2.setReadUmappedFlag(true);
end2.setProperPairFlag(false);
end2.setFirstOfPairFlag(!end1IsFirstOfPair);
end2.setSecondOfPairFlag(end1IsFirstOfPair);
end2.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
if (programRecord != null) {
end2.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
}
fillInBasesAndQualities(end2);
this.records.add(end1);
this.records.add(end2);
}
/**
* Fills in bases and qualities with randomly generated data.
* Relies on the alignment start and end having been set to get read length.
*/
private void fillInBasesAndQualities(final SAMRecord rec) {
final int length = this.readLength;
final byte[] bases = new byte[length];
final byte[] quals = new byte[length];
for (int i=0; i<length; ++i) {
bases[i] = BASES[this.random.nextInt(BASES.length)];
quals[i] = (byte) this.random.nextInt(50);
}
rec.setReadBases(bases);
rec.setBaseQualities(quals);
}
/**
* Creates samFileReader from the data in instance of this class
* @return SAMFileReader
*/
public SAMFileReader getSamReader() {
File tempFile = null;
try {
tempFile = File.createTempFile("temp", ".sam");
} catch (IOException e) {
throw new RuntimeIOException("problems creating tempfile", e);
}
this.header.setAttribute("VN", "1.0");
final SAMFileWriter w = new SAMFileWriterFactory().makeBAMWriter(this.header, true, tempFile);
for (final SAMRecord r:this.getRecords()){
w.addAlignment(r);
}
w.close();
final SAMFileReader reader = new SAMFileReader(tempFile);
tempFile.delete();
return reader;
}
}