diff --git a/java/src/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtils.java b/java/src/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtils.java new file mode 100644 index 000000000..c99723277 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtils.java @@ -0,0 +1,129 @@ +package org.broadinstitute.sting.utils.fasta; + +import org.broadinstitute.sting.utils.StingException; + +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.util.List; + + +/** + * @author aaron + *

+ * Class ArtificialFastaUtils + *

+ * artificial fasta utility class, for generating fake fastas. + */ +public class ArtificialFastaUtils { + public enum BASE_PATTERN { + RANDOM, ALL_A, ALL_T, ALL_C, ALL_G; + } + + // what bases we support + public enum BASES { + A, T, C, G; + } + + // create an artificial fasta file + public static void createArtificialFasta(String fileName, + List contigNames, + List contigSizes, + BASE_PATTERN pattern) { + PrintStream s; + try { + s = new PrintStream(new FileOutputStream(fileName)); + } catch (FileNotFoundException e) { + throw new StingException("Filename " + fileName + " passed to the ArtificialFastaUtils generated a FileNotFound exception", e); + } + generateFakeFasta(contigNames, contigSizes, pattern, s); + } + + // create an artificial fasta file + public static void createArtificialFasta(PrintStream stream, + List contigNames, + List contigSizes, + BASE_PATTERN pattern) { + + generateFakeFasta(contigNames, contigSizes, pattern, stream); + } + + /** + * create a fake fasta file + * + * @param contigNames the pile of contig names + * @param contigSizes the pile of contig sizes + * @param pattern the pattern to use for the base distrobution + * @param s the print stream to write to + */ + private static void generateFakeFasta(List contigNames, List contigSizes, BASE_PATTERN pattern, PrintStream s) { + if (contigNames.size() != contigSizes.size()) { + throw new StingException("ArtificialContig name and size arrays are not equal sizes"); + } + for (int x = 0; x < contigNames.size(); x++) { + ArtificialContig tig = new ArtificialContig(contigNames.get(x), contigSizes.get(x), pattern); + tig.write(s); + } + s.close(); + } + +} + + +/** the fake contig class, a fasta is made up of these */ +class ArtificialContig { + public static final int COLUMN_WIDTH = 80; + + final protected String mName; + final protected int mSize; + final protected ArtificialFastaUtils.BASE_PATTERN mPattern; + + public ArtificialContig(String name, int size, ArtificialFastaUtils.BASE_PATTERN pat) { + this.mName = name; + this.mSize = size; + this.mPattern = pat; + } + + /** + * write out the contig to a stream + * + * @param stream + */ + public void write(PrintStream stream) { + stream.println(">" + mName); + int count = 0; + while (count < mSize) { + for (int x = 0; x < COLUMN_WIDTH; x++) { + stream.print(generateAppropriateBase()); + count++; + if (count >= mSize) { + break; + } + } + stream.println(); + } + } + + /** + * generate the appropriate base, given the BASE_PATTERN + * + * @return a base, as a string + */ + public String generateAppropriateBase() { + switch (mPattern) { + case RANDOM: + return (ArtificialFastaUtils.BASES.values()[(int) Math.round(Math.random() * 4)]).toString(); + case ALL_A: + return "A"; + case ALL_T: + return "T"; + case ALL_C: + return "C"; + case ALL_G: + return "G"; + default: + throw new StingException("Unknown base pattern"); + } + } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java new file mode 100644 index 000000000..fd7f5f9f8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.utils.sam; + + +/** + * + * @author aaron + * + * Class ArtificialSAMGenerator + * + * This provides for an external utility, that creates sam files and associates fasta files + */ +public class ArtificialSAMGenerator { +} + + + + + + + + + + +class ArtificialFASTAUtils { + + + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java index 56c72f8ac..0f2d6e84b 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java @@ -161,7 +161,7 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator implements contigIndex = rec.getSequenceIndex(); } } - if (contigIndex < 0) { throw new IllegalArgumentException("Contig" + contig + " doesn't exist"); } + if (contigIndex < 0) { throw new IllegalArgumentException("ArtificialContig" + contig + " doesn't exist"); } while (super.hasNext() && this.peek().getReferenceIndex() < contigIndex) { super.next(); } diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index ae2b5d1a9..97feb87a0 100755 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -1,30 +1,12 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; - -import java.io.File; -import java.util.*; - import org.broadinstitute.sting.gatk.iterators.QueryIterator; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.StingException; -/** - * - * User: aaron - * Date: May 21, 2009 - * Time: 2:57:48 PM - * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - +import java.io.File; +import java.util.ArrayList; +import java.util.List; /** * @author aaron @@ -46,11 +28,11 @@ public class ArtificialSAMUtils { SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize); File outFile = new File(filename); - SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, false, outFile); - + SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, true, outFile); + for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) { - for (int readNumber = 0; readNumber < readsPerChomosome; readNumber++) { - out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, 100)); + for (int readNumber = 1; readNumber < readsPerChomosome; readNumber++) { + out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, DEFAULT_READ_LENGTH)); } } @@ -73,7 +55,7 @@ public class ArtificialSAMUtils { SAMFileWriter out = new SAMFileWriterFactory().makeSAMWriter(header, false, outFile); for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) { - for (int readNumber = 0; readNumber < readsPerChomosome; readNumber++) { + for (int readNumber = 1; readNumber <= readsPerChomosome; readNumber++) { out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, 100)); } } @@ -92,6 +74,7 @@ public class ArtificialSAMUtils { */ public static SAMFileHeader createArtificialSamHeader( int numberOfChromosomes, int startingChromosome, int chromosomeSize ) { SAMFileHeader header = new SAMFileHeader(); + header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate); SAMSequenceDictionary dict = new SAMSequenceDictionary(); // make up some sequence records for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) { @@ -161,7 +144,7 @@ public class ArtificialSAMUtils { */ public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) { if (alignmentStart == 0) - throw new StingException("Invalid alignment start for artificial read"); + throw new StingException("Invalid alignment start for artificial read, start = " + alignmentStart); SAMRecord record = new SAMRecord(header); record.setReadName(name); record.setReferenceIndex(refIndex); diff --git a/java/test/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtilsTest.java b/java/test/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtilsTest.java new file mode 100644 index 000000000..e59140628 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtilsTest.java @@ -0,0 +1,43 @@ +package org.broadinstitute.sting.utils.fasta; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + + +/** + * @author aaron + *

+ * Class ArtificialFastaUtilsTest + *

+ * test out the ArtificialFastaUtils functionality + */ +public class ArtificialFastaUtilsTest extends BaseTest { + + /** generate a fake fasta */ + @Test + public void testFastaGeneration() { + List names = new ArrayList(); + List sizes = new ArrayList(); + + for (int x = 0; x < 5; x++) { + sizes.add(1000); + names.add("chr" + (x+1)); + } + File temp = new File("tempFileFasta.fasta"); + ArtificialFastaUtils.createArtificialFasta(temp.getName(),names,sizes,ArtificialFastaUtils.BASE_PATTERN.ALL_A); + + // using the fasta sequence file to test, in reality we should use the indexed version + FastaSequenceFile2 fasta = new FastaSequenceFile2(temp); + + Assert.assertEquals(5,fasta.getSequenceDictionary().getSequences().size()); + + ArtificialSAMUtils.createArtificialBamFile("tempFileBAM.bam",5,1,1000,600); + //temp.delete(); + } +}