diff --git a/java/src/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtils.java b/java/src/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtils.java
new file mode 100644
index 000000000..c99723277
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtils.java
@@ -0,0 +1,129 @@
+package org.broadinstitute.sting.utils.fasta;
+
+import org.broadinstitute.sting.utils.StingException;
+
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+
+/**
+ * @author aaron
+ *
+ * Class ArtificialFastaUtils
+ *
+ * artificial fasta utility class, for generating fake fastas.
+ */
+public class ArtificialFastaUtils {
+ public enum BASE_PATTERN {
+ RANDOM, ALL_A, ALL_T, ALL_C, ALL_G;
+ }
+
+ // what bases we support
+ public enum BASES {
+ A, T, C, G;
+ }
+
+ // create an artificial fasta file
+ public static void createArtificialFasta(String fileName,
+ List contigNames,
+ List contigSizes,
+ BASE_PATTERN pattern) {
+ PrintStream s;
+ try {
+ s = new PrintStream(new FileOutputStream(fileName));
+ } catch (FileNotFoundException e) {
+ throw new StingException("Filename " + fileName + " passed to the ArtificialFastaUtils generated a FileNotFound exception", e);
+ }
+ generateFakeFasta(contigNames, contigSizes, pattern, s);
+ }
+
+ // create an artificial fasta file
+ public static void createArtificialFasta(PrintStream stream,
+ List contigNames,
+ List contigSizes,
+ BASE_PATTERN pattern) {
+
+ generateFakeFasta(contigNames, contigSizes, pattern, stream);
+ }
+
+ /**
+ * create a fake fasta file
+ *
+ * @param contigNames the pile of contig names
+ * @param contigSizes the pile of contig sizes
+ * @param pattern the pattern to use for the base distrobution
+ * @param s the print stream to write to
+ */
+ private static void generateFakeFasta(List contigNames, List contigSizes, BASE_PATTERN pattern, PrintStream s) {
+ if (contigNames.size() != contigSizes.size()) {
+ throw new StingException("ArtificialContig name and size arrays are not equal sizes");
+ }
+ for (int x = 0; x < contigNames.size(); x++) {
+ ArtificialContig tig = new ArtificialContig(contigNames.get(x), contigSizes.get(x), pattern);
+ tig.write(s);
+ }
+ s.close();
+ }
+
+}
+
+
+/** the fake contig class, a fasta is made up of these */
+class ArtificialContig {
+ public static final int COLUMN_WIDTH = 80;
+
+ final protected String mName;
+ final protected int mSize;
+ final protected ArtificialFastaUtils.BASE_PATTERN mPattern;
+
+ public ArtificialContig(String name, int size, ArtificialFastaUtils.BASE_PATTERN pat) {
+ this.mName = name;
+ this.mSize = size;
+ this.mPattern = pat;
+ }
+
+ /**
+ * write out the contig to a stream
+ *
+ * @param stream
+ */
+ public void write(PrintStream stream) {
+ stream.println(">" + mName);
+ int count = 0;
+ while (count < mSize) {
+ for (int x = 0; x < COLUMN_WIDTH; x++) {
+ stream.print(generateAppropriateBase());
+ count++;
+ if (count >= mSize) {
+ break;
+ }
+ }
+ stream.println();
+ }
+ }
+
+ /**
+ * generate the appropriate base, given the BASE_PATTERN
+ *
+ * @return a base, as a string
+ */
+ public String generateAppropriateBase() {
+ switch (mPattern) {
+ case RANDOM:
+ return (ArtificialFastaUtils.BASES.values()[(int) Math.round(Math.random() * 4)]).toString();
+ case ALL_A:
+ return "A";
+ case ALL_T:
+ return "T";
+ case ALL_C:
+ return "C";
+ case ALL_G:
+ return "G";
+ default:
+ throw new StingException("Unknown base pattern");
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java
new file mode 100644
index 000000000..fd7f5f9f8
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java
@@ -0,0 +1,28 @@
+package org.broadinstitute.sting.utils.sam;
+
+
+/**
+ *
+ * @author aaron
+ *
+ * Class ArtificialSAMGenerator
+ *
+ * This provides for an external utility, that creates sam files and associates fasta files
+ */
+public class ArtificialSAMGenerator {
+}
+
+
+
+
+
+
+
+
+
+
+class ArtificialFASTAUtils {
+
+
+
+}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
index 56c72f8ac..0f2d6e84b 100644
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
@@ -161,7 +161,7 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator implements
contigIndex = rec.getSequenceIndex();
}
}
- if (contigIndex < 0) { throw new IllegalArgumentException("Contig" + contig + " doesn't exist"); }
+ if (contigIndex < 0) { throw new IllegalArgumentException("ArtificialContig" + contig + " doesn't exist"); }
while (super.hasNext() && this.peek().getReferenceIndex() < contigIndex) {
super.next();
}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
index ae2b5d1a9..97feb87a0 100755
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
@@ -1,30 +1,12 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*;
-
-import java.io.File;
-import java.util.*;
-
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
-import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.StingException;
-/**
- *
- * User: aaron
- * Date: May 21, 2009
- * Time: 2:57:48 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
/**
* @author aaron
@@ -46,11 +28,11 @@ public class ArtificialSAMUtils {
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
File outFile = new File(filename);
- SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, false, outFile);
-
+ SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, true, outFile);
+
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
- for (int readNumber = 0; readNumber < readsPerChomosome; readNumber++) {
- out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, 100));
+ for (int readNumber = 1; readNumber < readsPerChomosome; readNumber++) {
+ out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, DEFAULT_READ_LENGTH));
}
}
@@ -73,7 +55,7 @@ public class ArtificialSAMUtils {
SAMFileWriter out = new SAMFileWriterFactory().makeSAMWriter(header, false, outFile);
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
- for (int readNumber = 0; readNumber < readsPerChomosome; readNumber++) {
+ for (int readNumber = 1; readNumber <= readsPerChomosome; readNumber++) {
out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, 100));
}
}
@@ -92,6 +74,7 @@ public class ArtificialSAMUtils {
*/
public static SAMFileHeader createArtificialSamHeader( int numberOfChromosomes, int startingChromosome, int chromosomeSize ) {
SAMFileHeader header = new SAMFileHeader();
+ header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
SAMSequenceDictionary dict = new SAMSequenceDictionary();
// make up some sequence records
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
@@ -161,7 +144,7 @@ public class ArtificialSAMUtils {
*/
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
if (alignmentStart == 0)
- throw new StingException("Invalid alignment start for artificial read");
+ throw new StingException("Invalid alignment start for artificial read, start = " + alignmentStart);
SAMRecord record = new SAMRecord(header);
record.setReadName(name);
record.setReferenceIndex(refIndex);
diff --git a/java/test/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtilsTest.java b/java/test/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtilsTest.java
new file mode 100644
index 000000000..e59140628
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/utils/fasta/ArtificialFastaUtilsTest.java
@@ -0,0 +1,43 @@
+package org.broadinstitute.sting.utils.fasta;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author aaron
+ *
+ * Class ArtificialFastaUtilsTest
+ *
+ * test out the ArtificialFastaUtils functionality
+ */
+public class ArtificialFastaUtilsTest extends BaseTest {
+
+ /** generate a fake fasta */
+ @Test
+ public void testFastaGeneration() {
+ List names = new ArrayList();
+ List sizes = new ArrayList();
+
+ for (int x = 0; x < 5; x++) {
+ sizes.add(1000);
+ names.add("chr" + (x+1));
+ }
+ File temp = new File("tempFileFasta.fasta");
+ ArtificialFastaUtils.createArtificialFasta(temp.getName(),names,sizes,ArtificialFastaUtils.BASE_PATTERN.ALL_A);
+
+ // using the fasta sequence file to test, in reality we should use the indexed version
+ FastaSequenceFile2 fasta = new FastaSequenceFile2(temp);
+
+ Assert.assertEquals(5,fasta.getSequenceDictionary().getSequences().size());
+
+ ArtificialSAMUtils.createArtificialBamFile("tempFileBAM.bam",5,1,1000,600);
+ //temp.delete();
+ }
+}