Added a directory to house some Illumina output parsers. Hopefully this will be merged back into Picard at some point.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@126 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-03-21 17:55:56 +00:00
parent 497eea2e5c
commit 3e350006e0
5 changed files with 437 additions and 0 deletions

View File

@ -0,0 +1,135 @@
/*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
//package edu.mit.broad.picard.illumina;
package org.broadinstitute.sting.illumina;
import edu.mit.broad.picard.util.BasicTextFileParser;
import java.io.Closeable;
import java.io.File;
import java.io.FilenameFilter;
import java.util.*;
import net.sf.samtools.util.StringUtil;
/**
* Abstract base class for implementing parsers for various versions of Firecrest output
*/
public abstract class AbstractFirecrestFileParser implements Iterator<FirecrestReadData>, Iterable<FirecrestReadData>, Closeable {
protected final int lane;
protected final File firecrestDirectory;
private FirecrestReadData next = null;
private boolean iterating = false;
/**
* Examine the bustard directory to see if it is valid, and prepare for parsing
*/
public AbstractFirecrestFileParser(final File firecrestDirectory, final int lane) {
this.lane = lane;
this.firecrestDirectory = firecrestDirectory;
}
/**
* @return true if the given bustard directory contains the appropriate files, or at least enough
* of them so that it appears to be a Firecrest directory corresponding to the version of the concrete
* FirecrestFileParser implementation.
*/
public abstract boolean isValidFirecrestDirectory();
/**
* Called before iteration begins. If this method is called when isValidFirecrestDirectory() had
* return false, it will generate exceptions that may help the user diagnose the problem.
*/
protected abstract void prepareToIterate();
/**
* @return the next read
*/
protected abstract FirecrestReadData readNext();
/**
* @return an iterator over a set of elements of type FirecrestReadData
*/
public Iterator<FirecrestReadData> iterator() {
if (iterating) {
throw new IllegalStateException("iterator() method can only be called once, before the first call to hasNext()");
}
prepareToIterate();
next = readNext();
iterating = true;
return this;
}
/**
* @return true if the iteration has more elements. Otherwise returns false.
*/
public boolean hasNext() {
if (!iterating) {
iterator();
}
return next != null;
}
/**
* Returns the next element in the iteration.
*
* @return the next element in the iteration
* @throws java.util.NoSuchElementException
*/
public FirecrestReadData next() {
if (!hasNext()) {
throw new NoSuchElementException("Iteration has no more elements.");
}
final FirecrestReadData result = next;
next = readNext();
return result;
}
/**
* Required method for Iterator API.
*
* @throws UnsupportedOperationException
*/
public void remove() {
throw new UnsupportedOperationException("Remove() not supported.");
}
/**
* Override, e.g. to close parser
*/
public void close() {
}
public int getLane() { return this.lane; }
/**
* Convenience method to create a parser for a list of files of the same format that should
* be parsed in order defined by FirecrestFilenameComparator
* @param files to be iterated, in arbitrary order
* @return parser that iterates through the files in the appropriate order
*/
protected BasicTextFileParser makeParserForTextFiles(final boolean treatGroupedDelimitersAsOne, File[] files) {
final SortedSet<File> sortedRead1 = new TreeSet<File>(new FirecrestFilenameComparator());
sortedRead1.addAll(Arrays.asList(files));
files = sortedRead1.toArray(files);
return new BasicTextFileParser(treatGroupedDelimitersAsOne, files);
}
protected File[] getFilesMatchingRegexp(final String regexp) {
return firecrestDirectory.listFiles( new FilenameFilter() {
public boolean accept(final File dir, final String name) {
return name.matches(regexp);
}
});
}
}

View File

@ -0,0 +1,107 @@
/*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
package org.broadinstitute.sting.illumina;
import edu.mit.broad.picard.util.PasteParser;
import edu.mit.broad.picard.util.FormatUtil;
import edu.mit.broad.picard.util.BasicTextFileParser;
import edu.mit.broad.picard.PicardException;
import java.io.File;
/**
* Class to parse the data in an Illumina Firecrest directory and return an iterator over that data, in order
* by tile.
*
* @author Kiran Garimella
*/
public class FirecrestFileParser extends AbstractFirecrestFileParser {
private BasicTextFileParser parser;
private final FormatUtil formatter = new FormatUtil();
private final File[] intensityFiles;
/**
* Constructor
*
* @param firecrestDirectory directory where the Firecrest files can be located
* @param lane the lane to parse
*/
public FirecrestFileParser(final File firecrestDirectory, final int lane) {
super(firecrestDirectory, lane);
intensityFiles = getFilesMatchingRegexp("s_" + lane + "_\\d{4}_int.txt(.gz)?");
}
@Override
public boolean isValidFirecrestDirectory() {
return (intensityFiles.length > 0);
}
/**
* Sorts the relevant files in the firecrestDirectory. Does some basic sanity checking to ensure that some files
* are found and that they are the expected multiple for paired-end or not.
*
*/
@Override
protected void prepareToIterate() {
// Some basic sanity checking on file counts
if (intensityFiles.length == 0) {
throw new PicardException("No Firecrest 1.3 intensity files found in " + firecrestDirectory.getAbsolutePath() + " for lane " + lane);
}
// Sort each set of reads and create a text parser for it
parser = makeParserForTextFiles(true, intensityFiles);
}
/**
* Parses the next line from the parser and constructs a FirecrestReadData object from it
* The first 4 fields are position information for the read, and the remaining value are
* the intensities data.
*
* @return a fully populated FirecrestReadData object
*/
protected FirecrestReadData readNext() {
if (!parser.hasNext()) {
return null;
}
final String[] data = parser.next();
final int lane = formatter.parseInt(data[0]);
final int tile = formatter.parseInt(data[1]);
final int x = formatter.parseInt(data[2]);
final int y = formatter.parseInt(data[3]);
int intensityOffset = 4;
int numIntensities = (data.length - 4)/4;
FourIntensity[] intensities = new FourIntensity[numIntensities];
for (int cycle = 0, index = intensityOffset; cycle < numIntensities; cycle++) {
float[] fIntensities = new float[4];
for (int channel = 0; channel < 4; channel++, index++) {
fIntensities[channel] = formatter.parseFloat(data[index]);
}
intensities[cycle] = new FourIntensity(fIntensities);
}
return new FirecrestReadData(lane, tile, x, y, intensities);
}
/**
* Closes the underlying PasteParser
*/
@Override
public void close() {
if (parser != null) {
parser.close();
}
}
}

View File

@ -0,0 +1,75 @@
/*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
//package edu.mit.broad.picard.illumina;
package org.broadinstitute.sting.illumina;
import java.io.File;
import java.util.Comparator;
/**
* Comparator for getting Firecrest files in "sorted" order for use by the FirecrestFileParser. Expected order is
* by lane in ascending order, then by tile in ascending order.
*
* IMPORTANT: Currently this class expects to receive ONLY int files.
*
* @author Kiran Garimella
*/
public class FirecrestFilenameComparator implements Comparator<File> {
/**
* Compares its two arguments for order. Returns a negative integer, zero, or a positive integer as
* the first argument is less than, equal to, or greater than the second.
*
* @param file1
* @param file2
* @return a negative integer, zero, or a positive integer as
* the first argument is less than, equal to, or greater than the second.
*/
public int compare(File file1, File file2)
{
Integer parts1[] = parseFileNameParts(file1.getName());
Integer parts2[] = parseFileNameParts(file2.getName());
for (int i = 1; i < parts1.length; i++)
{
if (!parts1[i].equals(parts2[i])) {
return parts1[i].compareTo(parts2[i]);
}
}
return 0;
}
/**
* Utility method that returns an array of integers that represent, in order,
* the lane, tile, type (0 for qseq files, 1 for sig2 files), and read (if any)
* represented by the given file name
*
* @param name
* @return an array of integers that represent, in order,
* the lane, tile, type (0 for qseq files, 1 for sig2 files), and read (if any)
* represented by the given file name
*/
private Integer[] parseFileNameParts(String name)
{
Integer parts[] = new Integer[3]; // Lane, tile, read
String src[] = name.split("_");
parts[0] = new Integer(src[1]); // Lane is always the second part
if (src[2].length() == 4) { // Tile is 3rd or fourth
parts[1] = new Integer(src[2]);
}
else {
parts[1] = new Integer(src[3]);
}
if (src[2].length() == 1) { // read is last
parts[2] = new Integer(src[2]);
}
return parts;
}
}

View File

@ -0,0 +1,57 @@
/*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
package org.broadinstitute.sting.illumina;
/**
* Holds all the Firecrest-level data we need (so far) about an individual read.
*
* @author Kiran Garimella
*/
public class FirecrestReadData {
final private int laneNumber;
final private int tileNumber;
final private int xCoordinate;
final private int yCoordinate;
final private FourIntensity[] intensities;
/**
* Constructor that takes everything to populate this object
*
* @param laneNumber
* @param tileNumber
* @param xCoordinate
* @param yCoordinate
* @param intensities
*/
public FirecrestReadData(int laneNumber, int tileNumber, int xCoordinate, int yCoordinate, FourIntensity[] intensities) {
this.laneNumber = laneNumber;
this.tileNumber = tileNumber;
this.xCoordinate = xCoordinate;
this.yCoordinate = yCoordinate;
this.intensities = intensities;
}
/**
* Composes a name for this read from its values.
*
* @return the read name
*/
public String getReadName() {
return this.laneNumber + ":" + this.tileNumber + ":" + this.xCoordinate + ":" + this.yCoordinate + "#0";
}
public int getLaneNumber() { return laneNumber; }
public int getTileNumber() { return tileNumber; }
public int getXCoordinate() { return xCoordinate; }
public int getYCoordinate() { return yCoordinate; }
public FourIntensity[] getIntensities() { return intensities; }
}

View File

@ -0,0 +1,63 @@
package org.broadinstitute.sting.illumina;
import java.util.StringTokenizer;
public class FourIntensity {
private float[] fIntensities;
public FourIntensity() {
fIntensities = new float[4];
}
public FourIntensity(float[] fIntensities) {
this.fIntensities = fIntensities;
}
public FourIntensity(FourIntensity intensity) {
fIntensities = new float[4];
for (int channel = 0; channel < 4; channel++) {
fIntensities[channel] = intensity.getChannelIntensity(channel);
}
}
public void add(FourIntensity intensity) {
for (int channel = 0; channel < 4; channel++) {
fIntensities[channel] += intensity.getChannelIntensity(channel);
}
}
public void subtract(FourIntensity intensity) {
for (int channel = 0; channel < 4; channel++) {
fIntensities[channel] -= intensity.getChannelIntensity(channel);
}
}
public void divide(float divisor) {
for (int channel = 0; channel < 4; channel++) {
fIntensities[channel] /= divisor;
}
}
public float getChannelIntensity(int channel) { return fIntensities[channel]; }
public int brightestChannel() {
int brightest = 0;
for (int channel = 1; channel < 4; channel++) {
if (fIntensities[channel] > fIntensities[brightest]) {
brightest = channel;
}
}
return brightest;
}
public String toString() {
return "(" + getChannelIntensity(0) +
", " + getChannelIntensity(1) +
", " + getChannelIntensity(2) +
", " + getChannelIntensity(3) +
")";
}
}