Adding some utilities to test unmapped reads

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@887 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-03 07:40:34 +00:00
parent 080af519cb
commit 40af4f085c
1 changed files with 120 additions and 18 deletions

View File

@ -3,8 +3,10 @@ package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*; import net.sf.samtools.*;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.*;
import java.util.List;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
/** /**
* *
@ -33,8 +35,11 @@ import java.util.List;
* A collection of utilities for making sam and bam files. Mostly these are for creating sam and bam files for testing purposes. * A collection of utilities for making sam and bam files. Mostly these are for creating sam and bam files for testing purposes.
*/ */
public class ArtificialSamUtils { public class ArtificialSamUtils {
public static final int DEFAULT_READ_LENGTH = 50;
/** /**
* create an artificial sam file * create an artificial sam file
*
* @param filename the filename to write to * @param filename the filename to write to
* @param numberOfChromosomes the number of chromosomes * @param numberOfChromosomes the number of chromosomes
* @param startingChromosome where to start counting * @param startingChromosome where to start counting
@ -58,6 +63,7 @@ public class ArtificialSamUtils {
/** /**
* create an artificial sam file * create an artificial sam file
*
* @param filename the filename to write to * @param filename the filename to write to
* @param numberOfChromosomes the number of chromosomes * @param numberOfChromosomes the number of chromosomes
* @param startingChromosome where to start counting * @param startingChromosome where to start counting
@ -81,6 +87,7 @@ public class ArtificialSamUtils {
/** /**
* Creates an artificial sam header, matching the parameters, chromosomes which will be labeled chr1, chr2, etc * Creates an artificial sam header, matching the parameters, chromosomes which will be labeled chr1, chr2, etc
*
* @param numberOfChromosomes the number of chromosomes to create * @param numberOfChromosomes the number of chromosomes to create
* @param startingChromosome the starting number for the chromosome (most likely set to 1) * @param startingChromosome the starting number for the chromosome (most likely set to 1)
* @param chromosomeSize the length of each chromosome * @param chromosomeSize the length of each chromosome
@ -92,7 +99,7 @@ public class ArtificialSamUtils {
// make up some sequence records // make up some sequence records
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) { for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
SAMSequenceRecord rec = new SAMSequenceRecord("chr" + (x), chromosomeSize /* size */); SAMSequenceRecord rec = new SAMSequenceRecord("chr" + (x), chromosomeSize /* size */);
rec.setSequenceLength(1000); rec.setSequenceLength(chromosomeSize);
dict.addSequence(rec); dict.addSequence(rec);
} }
header.setSequenceDictionary(dict); header.setSequenceDictionary(dict);
@ -101,6 +108,7 @@ public class ArtificialSamUtils {
/** /**
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
*
* @param header the SAM header to associate the read with * @param header the SAM header to associate the read with
* @param name the name of the read * @param name the name of the read
* @param refIndex the reference index, i.e. what chromosome to associate it with * @param refIndex the reference index, i.e. what chromosome to associate it with
@ -114,10 +122,104 @@ public class ArtificialSamUtils {
record.setReferenceIndex(refIndex); record.setReferenceIndex(refIndex);
record.setAlignmentStart(alignmentStart + 1); record.setAlignmentStart(alignmentStart + 1);
List<CigarElement> elements = new ArrayList<CigarElement>(); List<CigarElement> elements = new ArrayList<CigarElement>();
elements.add(new CigarElement( length, CigarOperator.characterToEnum('M'))); elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
record.setCigar(new Cigar(elements)); record.setCigar(new Cigar(elements));
record.setProperPairFlag(false); record.setProperPairFlag(false);
return record; return record;
} }
/**
* create an iterator containing the specified read piles
* @param startingChr the chromosome (reference ID) to start from
* @param endingChr the id to end with
* @param readCount the number of reads per chromosome
* @return StingSAMIterator representing the specified amount of fake data
*/
public static StingSAMIterator unmappedReadIterator(int startingChr, int endingChr, int readCount ) {
SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
Map<Integer, Integer> map = new HashMap<Integer, Integer>();
for (int x = startingChr; x < endingChr; x++) {
map.put(x,readCount);
}
return new ArtificialSAMIterator(startingChr, endingChr, readCount,header);
}
}
/**
* this fake iterator allows us to look at how specific piles of reads are handled
*/
class ArtificialSAMIterator implements StingSAMIterator {
private int currentChromo = 0;
private int currentRead = 0;
private int readCount = 0;
private boolean done = false;
// the next record
private SAMRecord next = null;
SAMFileHeader header = null;
// the passed in parameters
final int sChr;
final int eChr;
final int rCount;
/**
* create the fake iterator, given the mapping of chromosomes and read counts
* @param startingChr the starting chromosome
* @param endingChr the ending chromosome
* @param readCount the number of reads in each chromosome
* @param header the associated header
*/
ArtificialSAMIterator(int startingChr, int endingChr, int readCount, SAMFileHeader header) {
sChr = startingChr;
eChr = endingChr;
rCount = readCount;
this.header = header;
this.currentChromo = startingChr;
}
public Reads getSourceInfo() {
throw new UnsupportedOperationException("We don't support this");
}
public void close() {
// done
currentChromo = Integer.MAX_VALUE;
}
public boolean hasNext() {
if (currentRead >= rCount) {
currentChromo++;
currentRead = 0;
}
if (currentChromo >= eChr) {
return false;
}
int alignment = 0;
if (currentChromo >= 0) {
alignment = currentRead;
} else {
alignment = 0;
}
this.next = ArtificialSamUtils.createArtificialRead(this.header,String.valueOf(readCount), currentChromo, alignment, 50);
currentRead++;
return true;
}
public SAMRecord next() {
return next;
}
public void remove() {
throw new UnsupportedOperationException("You've tried to remove on a StingSAMIterator (unsupported), not to mention that this is a fake iterator.");
}
public Iterator<SAMRecord> iterator() {
return this;
}
} }