/* * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT * This software and its documentation are copyright 2009 by the * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ package edu.mit.broad.picard.illumina; import edu.mit.broad.picard.util.PasteParser; import edu.mit.broad.picard.util.TabbedTextFileParser; import edu.mit.broad.picard.PicardException; import edu.mit.broad.sam.util.CloseableIterator; import java.io.File; import java.util.Iterator; import java.util.Arrays; import java.util.regex.Pattern; import java.text.ParsePosition; import java.text.NumberFormat; /** * Parse the pair of files (eland_extended.txt and export.txt) that correspond to an end of a Gerald run for a lane. */ public class GeraldParser implements Iterable, CloseableIterator { private static final int EXPECTED_ELAND_FIELDS = 4; // Regex used to split apart multiple alignments in the eland output private static final Pattern ALIGN_SPLITTER = Pattern.compile("\\,+"); // export.txt constants private static final int PASSING_FILTER_COLUMN = 21; private static final int QUALITIES_COLUMN = 9; private static final int REQUIRED_EXPORT_COLUMNS = PASSING_FILTER_COLUMN + 1; private final NumberFormat integerFormat = NumberFormat.getIntegerInstance(); private final SquashedCoordinateMap geraldToArachne; private final PasteParser pasteParser; private final File elandExtended; private final File export; private boolean iteratorCalled = false; private final byte[] solexaToPhredQualityConverter = new SolexaQualityConverter().getSolexaToPhredConversionTable(); /** * @param geraldToArachne for converting btw Gerald coordinate and genomic coordinate */ public GeraldParser(final SquashedCoordinateMap geraldToArachne, final File elandExtended, final File export) { this.geraldToArachne = geraldToArachne; this.elandExtended = elandExtended; this.export = export; final TabbedTextFileParser[] parsers = { new TabbedTextFileParser(false, elandExtended), new TabbedTextFileParser(false, export) }; pasteParser = new PasteParser(parsers); } public Iterator iterator() { if (iteratorCalled) { throw new IllegalStateException("iterator() cannot be called more than once on a GeraldParser instance."); } iteratorCalled = true; return this; } public void close() { pasteParser.close(); } public boolean hasNext() { return pasteParser.hasNext(); } public GeraldAlignment next() { final GeraldAlignment ret = new GeraldAlignment(); final String[][] fields = pasteParser.next(); // Parse eland_extended.txt fields final String[] elandExtendedFields = fields[0]; if (elandExtendedFields.length < EXPECTED_ELAND_FIELDS) { throw new PicardException("Not enough fields in file: " + elandExtended); } ret.readName = elandExtendedFields[0].substring(1); ret.readBases = elandExtendedFields[1]; ret.readLength = ret.readBases.length(); final String[] alignCounts = elandExtendedFields[2].split(":"); if (alignCounts.length == 3) { ret.zeroMismatchPlacements = Short.parseShort(alignCounts[0]); ret.oneMismatchPlacements = Short.parseShort(alignCounts[1]); ret.twoMismatchPlacements = Short.parseShort(alignCounts[2]); } final String[] alignments = ALIGN_SPLITTER.split(elandExtendedFields[3]); if (alignments.length == 1 && !"-".equals(alignments[0])) { final int lastDot = alignments[0].lastIndexOf("."); final int colon = alignments[0].indexOf(':'); final String tmp = alignments[0].substring(colon + 1); final ParsePosition pos = new ParsePosition(0); final long start = integerFormat.parse(tmp, pos).longValue(); if (pos.getIndex() == 0) { throw new RuntimeException("Problem parsing eland extended alignment record: " + Arrays.toString(elandExtendedFields)); } final SimpleMapping m = new SimpleMapping(alignments[0].substring(lastDot+1, colon).trim(), start, start + ret.readLength - 1, null); geraldToArachne.convertToArachneCoords(m); ret.primaryChrom = m.getSequenceName(); ret.primaryStart = m.getStartPos(); ret.primaryStop = m.getEndPos(); ret.orientation = tmp.substring(pos.getIndex(), pos.getIndex() + 1); ret.mismatchString = tmp.substring(pos.getIndex() + 1); // Count the mismatches in the alignment for (int i=pos.getIndex(); i