Adding some utilities to test unmapped reads
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@887 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
080af519cb
commit
40af4f085c
|
|
@ -3,8 +3,10 @@ package org.broadinstitute.sting.utils.sam;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -33,13 +35,16 @@ import java.util.List;
|
||||||
* A collection of utilities for making sam and bam files. Mostly these are for creating sam and bam files for testing purposes.
|
* A collection of utilities for making sam and bam files. Mostly these are for creating sam and bam files for testing purposes.
|
||||||
*/
|
*/
|
||||||
public class ArtificialSamUtils {
|
public class ArtificialSamUtils {
|
||||||
|
public static final int DEFAULT_READ_LENGTH = 50;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create an artificial sam file
|
* create an artificial sam file
|
||||||
* @param filename the filename to write to
|
*
|
||||||
|
* @param filename the filename to write to
|
||||||
* @param numberOfChromosomes the number of chromosomes
|
* @param numberOfChromosomes the number of chromosomes
|
||||||
* @param startingChromosome where to start counting
|
* @param startingChromosome where to start counting
|
||||||
* @param chromosomeSize how large each chromosome is
|
* @param chromosomeSize how large each chromosome is
|
||||||
* @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads)
|
* @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads)
|
||||||
*/
|
*/
|
||||||
public static void createArtificialBamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) {
|
public static void createArtificialBamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) {
|
||||||
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
|
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
|
||||||
|
|
@ -58,11 +63,12 @@ public class ArtificialSamUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create an artificial sam file
|
* create an artificial sam file
|
||||||
* @param filename the filename to write to
|
*
|
||||||
|
* @param filename the filename to write to
|
||||||
* @param numberOfChromosomes the number of chromosomes
|
* @param numberOfChromosomes the number of chromosomes
|
||||||
* @param startingChromosome where to start counting
|
* @param startingChromosome where to start counting
|
||||||
* @param chromosomeSize how large each chromosome is
|
* @param chromosomeSize how large each chromosome is
|
||||||
* @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads)
|
* @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads)
|
||||||
*/
|
*/
|
||||||
public static void createArtificialSamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) {
|
public static void createArtificialSamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) {
|
||||||
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
|
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
|
||||||
|
|
@ -81,9 +87,10 @@ public class ArtificialSamUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an artificial sam header, matching the parameters, chromosomes which will be labeled chr1, chr2, etc
|
* Creates an artificial sam header, matching the parameters, chromosomes which will be labeled chr1, chr2, etc
|
||||||
|
*
|
||||||
* @param numberOfChromosomes the number of chromosomes to create
|
* @param numberOfChromosomes the number of chromosomes to create
|
||||||
* @param startingChromosome the starting number for the chromosome (most likely set to 1)
|
* @param startingChromosome the starting number for the chromosome (most likely set to 1)
|
||||||
* @param chromosomeSize the length of each chromosome
|
* @param chromosomeSize the length of each chromosome
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public static SAMFileHeader createArtificialSamHeader(int numberOfChromosomes, int startingChromosome, int chromosomeSize) {
|
public static SAMFileHeader createArtificialSamHeader(int numberOfChromosomes, int startingChromosome, int chromosomeSize) {
|
||||||
|
|
@ -92,7 +99,7 @@ public class ArtificialSamUtils {
|
||||||
// make up some sequence records
|
// make up some sequence records
|
||||||
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
|
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
|
||||||
SAMSequenceRecord rec = new SAMSequenceRecord("chr" + (x), chromosomeSize /* size */);
|
SAMSequenceRecord rec = new SAMSequenceRecord("chr" + (x), chromosomeSize /* size */);
|
||||||
rec.setSequenceLength(1000);
|
rec.setSequenceLength(chromosomeSize);
|
||||||
dict.addSequence(rec);
|
dict.addSequence(rec);
|
||||||
}
|
}
|
||||||
header.setSequenceDictionary(dict);
|
header.setSequenceDictionary(dict);
|
||||||
|
|
@ -101,11 +108,12 @@ public class ArtificialSamUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
|
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
|
||||||
* @param header the SAM header to associate the read with
|
*
|
||||||
* @param name the name of the read
|
* @param header the SAM header to associate the read with
|
||||||
* @param refIndex the reference index, i.e. what chromosome to associate it with
|
* @param name the name of the read
|
||||||
|
* @param refIndex the reference index, i.e. what chromosome to associate it with
|
||||||
* @param alignmentStart where to start the alignment
|
* @param alignmentStart where to start the alignment
|
||||||
* @param length the length of the read
|
* @param length the length of the read
|
||||||
* @return the artificial read
|
* @return the artificial read
|
||||||
*/
|
*/
|
||||||
public static SAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
|
public static SAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
|
||||||
|
|
@ -114,10 +122,104 @@ public class ArtificialSamUtils {
|
||||||
record.setReferenceIndex(refIndex);
|
record.setReferenceIndex(refIndex);
|
||||||
record.setAlignmentStart(alignmentStart + 1);
|
record.setAlignmentStart(alignmentStart + 1);
|
||||||
List<CigarElement> elements = new ArrayList<CigarElement>();
|
List<CigarElement> elements = new ArrayList<CigarElement>();
|
||||||
elements.add(new CigarElement( length, CigarOperator.characterToEnum('M')));
|
elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
|
||||||
record.setCigar(new Cigar(elements));
|
record.setCigar(new Cigar(elements));
|
||||||
record.setProperPairFlag(false);
|
record.setProperPairFlag(false);
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create an iterator containing the specified read piles
|
||||||
|
* @param startingChr the chromosome (reference ID) to start from
|
||||||
|
* @param endingChr the id to end with
|
||||||
|
* @param readCount the number of reads per chromosome
|
||||||
|
* @return StingSAMIterator representing the specified amount of fake data
|
||||||
|
*/
|
||||||
|
public static StingSAMIterator unmappedReadIterator(int startingChr, int endingChr, int readCount ) {
|
||||||
|
SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||||
|
Map<Integer, Integer> map = new HashMap<Integer, Integer>();
|
||||||
|
for (int x = startingChr; x < endingChr; x++) {
|
||||||
|
map.put(x,readCount);
|
||||||
|
}
|
||||||
|
return new ArtificialSAMIterator(startingChr, endingChr, readCount,header);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* this fake iterator allows us to look at how specific piles of reads are handled
|
||||||
|
*/
|
||||||
|
class ArtificialSAMIterator implements StingSAMIterator {
|
||||||
|
|
||||||
|
|
||||||
|
private int currentChromo = 0;
|
||||||
|
private int currentRead = 0;
|
||||||
|
private int readCount = 0;
|
||||||
|
private boolean done = false;
|
||||||
|
// the next record
|
||||||
|
private SAMRecord next = null;
|
||||||
|
SAMFileHeader header = null;
|
||||||
|
|
||||||
|
// the passed in parameters
|
||||||
|
final int sChr;
|
||||||
|
final int eChr;
|
||||||
|
final int rCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create the fake iterator, given the mapping of chromosomes and read counts
|
||||||
|
* @param startingChr the starting chromosome
|
||||||
|
* @param endingChr the ending chromosome
|
||||||
|
* @param readCount the number of reads in each chromosome
|
||||||
|
* @param header the associated header
|
||||||
|
*/
|
||||||
|
ArtificialSAMIterator(int startingChr, int endingChr, int readCount, SAMFileHeader header) {
|
||||||
|
sChr = startingChr;
|
||||||
|
eChr = endingChr;
|
||||||
|
rCount = readCount;
|
||||||
|
this.header = header;
|
||||||
|
this.currentChromo = startingChr;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
throw new UnsupportedOperationException("We don't support this");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
// done
|
||||||
|
currentChromo = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
if (currentRead >= rCount) {
|
||||||
|
currentChromo++;
|
||||||
|
currentRead = 0;
|
||||||
|
}
|
||||||
|
if (currentChromo >= eChr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int alignment = 0;
|
||||||
|
if (currentChromo >= 0) {
|
||||||
|
alignment = currentRead;
|
||||||
|
} else {
|
||||||
|
alignment = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.next = ArtificialSamUtils.createArtificialRead(this.header,String.valueOf(readCount), currentChromo, alignment, 50);
|
||||||
|
currentRead++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord next() {
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("You've tried to remove on a StingSAMIterator (unsupported), not to mention that this is a fake iterator.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<SAMRecord> iterator() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue