diff --git a/playground/java/src/org/broadinstitute/sting/illumina/AbstractFirecrestFileParser.java b/playground/java/src/org/broadinstitute/sting/illumina/AbstractFirecrestFileParser.java new file mode 100644 index 000000000..33620fa55 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/illumina/AbstractFirecrestFileParser.java @@ -0,0 +1,135 @@ +/* +* The Broad Institute +* SOFTWARE COPYRIGHT NOTICE AGREEMENT +* This software and its documentation are copyright 2009 by the +* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. +* +* This software is supplied without any warranty or guaranteed support whatsoever. Neither +* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. +*/ +//package edu.mit.broad.picard.illumina; +package org.broadinstitute.sting.illumina; + +import edu.mit.broad.picard.util.BasicTextFileParser; + +import java.io.Closeable; +import java.io.File; +import java.io.FilenameFilter; +import java.util.*; + +import net.sf.samtools.util.StringUtil; + +/** + * Abstract base class for implementing parsers for various versions of Firecrest output + */ +public abstract class AbstractFirecrestFileParser implements Iterator, Iterable, Closeable { + protected final int lane; + protected final File firecrestDirectory; + private FirecrestReadData next = null; + private boolean iterating = false; + + /** + * Examine the bustard directory to see if it is valid, and prepare for parsing + */ + public AbstractFirecrestFileParser(final File firecrestDirectory, final int lane) { + this.lane = lane; + this.firecrestDirectory = firecrestDirectory; + } + + /** + * @return true if the given bustard directory contains the appropriate files, or at least enough + * of them so that it appears to be a Firecrest directory corresponding to the version of the concrete + * FirecrestFileParser implementation. + */ + public abstract boolean isValidFirecrestDirectory(); + + /** + * Called before iteration begins. If this method is called when isValidFirecrestDirectory() had + * return false, it will generate exceptions that may help the user diagnose the problem. + */ + protected abstract void prepareToIterate(); + + /** + * @return the next read + */ + protected abstract FirecrestReadData readNext(); + + + /** + * @return an iterator over a set of elements of type FirecrestReadData + */ + public Iterator iterator() { + if (iterating) { + throw new IllegalStateException("iterator() method can only be called once, before the first call to hasNext()"); + } + prepareToIterate(); + next = readNext(); + iterating = true; + return this; + } + + /** + * @return true if the iteration has more elements. Otherwise returns false. + */ + public boolean hasNext() { + if (!iterating) { + iterator(); + } + return next != null; + } + + /** + * Returns the next element in the iteration. + * + * @return the next element in the iteration + * @throws java.util.NoSuchElementException + */ + public FirecrestReadData next() { + + if (!hasNext()) { + throw new NoSuchElementException("Iteration has no more elements."); + } + + final FirecrestReadData result = next; + next = readNext(); + return result; + } + + /** + * Required method for Iterator API. + * + * @throws UnsupportedOperationException + */ + public void remove() { + throw new UnsupportedOperationException("Remove() not supported."); + } + + /** + * Override, e.g. to close parser + */ + public void close() { + } + + public int getLane() { return this.lane; } + + /** + * Convenience method to create a parser for a list of files of the same format that should + * be parsed in order defined by FirecrestFilenameComparator + * @param files to be iterated, in arbitrary order + * @return parser that iterates through the files in the appropriate order + */ + protected BasicTextFileParser makeParserForTextFiles(final boolean treatGroupedDelimitersAsOne, File[] files) { + final SortedSet sortedRead1 = new TreeSet(new FirecrestFilenameComparator()); + sortedRead1.addAll(Arrays.asList(files)); + files = sortedRead1.toArray(files); + return new BasicTextFileParser(treatGroupedDelimitersAsOne, files); + } + + protected File[] getFilesMatchingRegexp(final String regexp) { + return firecrestDirectory.listFiles( new FilenameFilter() { + public boolean accept(final File dir, final String name) { + return name.matches(regexp); + } + }); + } +} diff --git a/playground/java/src/org/broadinstitute/sting/illumina/FirecrestFileParser.java b/playground/java/src/org/broadinstitute/sting/illumina/FirecrestFileParser.java new file mode 100644 index 000000000..24b63f981 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/illumina/FirecrestFileParser.java @@ -0,0 +1,107 @@ +/* +* The Broad Institute +* SOFTWARE COPYRIGHT NOTICE AGREEMENT +* This software and its documentation are copyright 2009 by the +* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. +* +* This software is supplied without any warranty or guaranteed support whatsoever. Neither +* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. +*/ +package org.broadinstitute.sting.illumina; + +import edu.mit.broad.picard.util.PasteParser; +import edu.mit.broad.picard.util.FormatUtil; +import edu.mit.broad.picard.util.BasicTextFileParser; +import edu.mit.broad.picard.PicardException; + +import java.io.File; + +/** + * Class to parse the data in an Illumina Firecrest directory and return an iterator over that data, in order + * by tile. + * + * @author Kiran Garimella + */ +public class FirecrestFileParser extends AbstractFirecrestFileParser { + + private BasicTextFileParser parser; + private final FormatUtil formatter = new FormatUtil(); + private final File[] intensityFiles; + + /** + * Constructor + * + * @param firecrestDirectory directory where the Firecrest files can be located + * @param lane the lane to parse + */ + public FirecrestFileParser(final File firecrestDirectory, final int lane) { + super(firecrestDirectory, lane); + intensityFiles = getFilesMatchingRegexp("s_" + lane + "_\\d{4}_int.txt(.gz)?"); + } + + @Override + public boolean isValidFirecrestDirectory() { + return (intensityFiles.length > 0); + } + + /** + * Sorts the relevant files in the firecrestDirectory. Does some basic sanity checking to ensure that some files + * are found and that they are the expected multiple for paired-end or not. + * + */ + @Override + protected void prepareToIterate() { + // Some basic sanity checking on file counts + if (intensityFiles.length == 0) { + throw new PicardException("No Firecrest 1.3 intensity files found in " + firecrestDirectory.getAbsolutePath() + " for lane " + lane); + } + + // Sort each set of reads and create a text parser for it + parser = makeParserForTextFiles(true, intensityFiles); + } + + /** + * Parses the next line from the parser and constructs a FirecrestReadData object from it + * The first 4 fields are position information for the read, and the remaining value are + * the intensities data. + * + * @return a fully populated FirecrestReadData object + */ + protected FirecrestReadData readNext() { + if (!parser.hasNext()) { + return null; + } + final String[] data = parser.next(); + final int lane = formatter.parseInt(data[0]); + final int tile = formatter.parseInt(data[1]); + final int x = formatter.parseInt(data[2]); + final int y = formatter.parseInt(data[3]); + + int intensityOffset = 4; + int numIntensities = (data.length - 4)/4; + FourIntensity[] intensities = new FourIntensity[numIntensities]; + + for (int cycle = 0, index = intensityOffset; cycle < numIntensities; cycle++) { + float[] fIntensities = new float[4]; + for (int channel = 0; channel < 4; channel++, index++) { + fIntensities[channel] = formatter.parseFloat(data[index]); + } + + intensities[cycle] = new FourIntensity(fIntensities); + } + + + return new FirecrestReadData(lane, tile, x, y, intensities); + } + + /** + * Closes the underlying PasteParser + */ + @Override + public void close() { + if (parser != null) { + parser.close(); + } + } + +} diff --git a/playground/java/src/org/broadinstitute/sting/illumina/FirecrestFilenameComparator.java b/playground/java/src/org/broadinstitute/sting/illumina/FirecrestFilenameComparator.java new file mode 100644 index 000000000..c64afd3e4 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/illumina/FirecrestFilenameComparator.java @@ -0,0 +1,75 @@ +/* +* The Broad Institute +* SOFTWARE COPYRIGHT NOTICE AGREEMENT +* This software and its documentation are copyright 2009 by the +* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. +* +* This software is supplied without any warranty or guaranteed support whatsoever. Neither +* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. +*/ +//package edu.mit.broad.picard.illumina; +package org.broadinstitute.sting.illumina; + +import java.io.File; +import java.util.Comparator; + +/** + * Comparator for getting Firecrest files in "sorted" order for use by the FirecrestFileParser. Expected order is + * by lane in ascending order, then by tile in ascending order. + * + * IMPORTANT: Currently this class expects to receive ONLY int files. + * + * @author Kiran Garimella + */ +public class FirecrestFilenameComparator implements Comparator { + + /** + * Compares its two arguments for order. Returns a negative integer, zero, or a positive integer as + * the first argument is less than, equal to, or greater than the second. + * + * @param file1 + * @param file2 + * @return a negative integer, zero, or a positive integer as + * the first argument is less than, equal to, or greater than the second. + */ + public int compare(File file1, File file2) + { + Integer parts1[] = parseFileNameParts(file1.getName()); + Integer parts2[] = parseFileNameParts(file2.getName()); + + for (int i = 1; i < parts1.length; i++) + { + if (!parts1[i].equals(parts2[i])) { + return parts1[i].compareTo(parts2[i]); + } + } + return 0; + } + + /** + * Utility method that returns an array of integers that represent, in order, + * the lane, tile, type (0 for qseq files, 1 for sig2 files), and read (if any) + * represented by the given file name + * + * @param name + * @return an array of integers that represent, in order, + * the lane, tile, type (0 for qseq files, 1 for sig2 files), and read (if any) + * represented by the given file name + */ + private Integer[] parseFileNameParts(String name) + { + Integer parts[] = new Integer[3]; // Lane, tile, read + String src[] = name.split("_"); + parts[0] = new Integer(src[1]); // Lane is always the second part + if (src[2].length() == 4) { // Tile is 3rd or fourth + parts[1] = new Integer(src[2]); + } + else { + parts[1] = new Integer(src[3]); + } + if (src[2].length() == 1) { // read is last + parts[2] = new Integer(src[2]); + } + return parts; + } +} diff --git a/playground/java/src/org/broadinstitute/sting/illumina/FirecrestReadData.java b/playground/java/src/org/broadinstitute/sting/illumina/FirecrestReadData.java new file mode 100644 index 000000000..dd62cc19d --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/illumina/FirecrestReadData.java @@ -0,0 +1,57 @@ +/* +* The Broad Institute +* SOFTWARE COPYRIGHT NOTICE AGREEMENT +* This software and its documentation are copyright 2009 by the +* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. +* +* This software is supplied without any warranty or guaranteed support whatsoever. Neither +* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. +*/ +package org.broadinstitute.sting.illumina; + +/** + * Holds all the Firecrest-level data we need (so far) about an individual read. + * + * @author Kiran Garimella + */ +public class FirecrestReadData { + final private int laneNumber; + final private int tileNumber; + final private int xCoordinate; + final private int yCoordinate; + final private FourIntensity[] intensities; + + + /** + * Constructor that takes everything to populate this object + * + * @param laneNumber + * @param tileNumber + * @param xCoordinate + * @param yCoordinate + * @param intensities + */ + public FirecrestReadData(int laneNumber, int tileNumber, int xCoordinate, int yCoordinate, FourIntensity[] intensities) { + this.laneNumber = laneNumber; + this.tileNumber = tileNumber; + this.xCoordinate = xCoordinate; + this.yCoordinate = yCoordinate; + this.intensities = intensities; + } + + /** + * Composes a name for this read from its values. + * + * @return the read name + */ + public String getReadName() { + return this.laneNumber + ":" + this.tileNumber + ":" + this.xCoordinate + ":" + this.yCoordinate + "#0"; + } + + public int getLaneNumber() { return laneNumber; } + public int getTileNumber() { return tileNumber; } + public int getXCoordinate() { return xCoordinate; } + public int getYCoordinate() { return yCoordinate; } + public FourIntensity[] getIntensities() { return intensities; } + +} diff --git a/playground/java/src/org/broadinstitute/sting/illumina/FourIntensity.java b/playground/java/src/org/broadinstitute/sting/illumina/FourIntensity.java new file mode 100755 index 000000000..0bcc50628 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/illumina/FourIntensity.java @@ -0,0 +1,63 @@ +package org.broadinstitute.sting.illumina; + +import java.util.StringTokenizer; + +public class FourIntensity { + private float[] fIntensities; + + public FourIntensity() { + fIntensities = new float[4]; + } + + public FourIntensity(float[] fIntensities) { + this.fIntensities = fIntensities; + } + + public FourIntensity(FourIntensity intensity) { + fIntensities = new float[4]; + + for (int channel = 0; channel < 4; channel++) { + fIntensities[channel] = intensity.getChannelIntensity(channel); + } + } + + public void add(FourIntensity intensity) { + for (int channel = 0; channel < 4; channel++) { + fIntensities[channel] += intensity.getChannelIntensity(channel); + } + } + + public void subtract(FourIntensity intensity) { + for (int channel = 0; channel < 4; channel++) { + fIntensities[channel] -= intensity.getChannelIntensity(channel); + } + } + + public void divide(float divisor) { + for (int channel = 0; channel < 4; channel++) { + fIntensities[channel] /= divisor; + } + } + + public float getChannelIntensity(int channel) { return fIntensities[channel]; } + + public int brightestChannel() { + int brightest = 0; + + for (int channel = 1; channel < 4; channel++) { + if (fIntensities[channel] > fIntensities[brightest]) { + brightest = channel; + } + } + + return brightest; + } + + public String toString() { + return "(" + getChannelIntensity(0) + + ", " + getChannelIntensity(1) + + ", " + getChannelIntensity(2) + + ", " + getChannelIntensity(3) + + ")"; + } +}