reader now takes an argument for the maximum number of elements to read from the file.

This commit is contained in:
Mark DePristo 2011-07-12 08:53:19 -04:00
parent 8056a3fe89
commit 05212aea62
6 changed files with 29 additions and 16 deletions

View File

@ -51,12 +51,11 @@ import java.util.zip.GZIPInputStream;
* Class implementing diffnode reader for VCF * Class implementing diffnode reader for VCF
*/ */
public class BAMDiffableReader implements DiffableReader { public class BAMDiffableReader implements DiffableReader {
private final static int MAX_RECORDS_TO_READ = 1000;
@Override @Override
public String getName() { return "BAM"; } public String getName() { return "BAM"; }
@Override @Override
public DiffElement readFromFile(File file) { public DiffElement readFromFile(File file, int maxElementsToRead) {
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
@ -65,7 +64,7 @@ public class BAMDiffableReader implements DiffableReader {
int count = 0; int count = 0;
while ( iterator.hasNext() ) { while ( iterator.hasNext() ) {
if ( count++ > MAX_RECORDS_TO_READ ) if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break; break;
final SAMRecord record = iterator.next(); final SAMRecord record = iterator.next();

View File

@ -385,12 +385,17 @@ public class DiffEngine {
return findReaderForFile(file) != null; return findReaderForFile(file) != null;
} }
public DiffElement createDiffableFromFile(File file) { public DiffElement createDiffableFromFile(File file) {
return createDiffableFromFile(file, -1);
}
public DiffElement createDiffableFromFile(File file, int maxElementsToRead) {
DiffableReader reader = findReaderForFile(file); DiffableReader reader = findReaderForFile(file);
if ( reader == null ) if ( reader == null )
throw new UserException("Unsupported file type: " + file); throw new UserException("Unsupported file type: " + file);
else else
return reader.readFromFile(file); return reader.readFromFile(file, maxElementsToRead);
} }
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) { public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {

View File

@ -48,11 +48,14 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
@Output(doc="File to which results should be written",required=true) @Output(doc="File to which results should be written",required=true)
protected PrintStream out; protected PrintStream out;
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false) @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
int MAX_RECORDS = 0; int MAX_OBJECTS_TO_READ = -1;
@Argument(fullName="maxCount1Records", shortName="M1", doc="Max. number of records occuring exactly once in the file to process", required=false) @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false)
int MAX_COUNT1_RECORDS = 0; int MAX_DIFFS = 0;
@Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false)
int MAX_COUNT1_DIFFS = 0;
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
int minCountForDiff = 1; int minCountForDiff = 1;
@ -91,9 +94,9 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
@Override @Override
public void onTraversalDone(Integer sum) { public void onTraversalDone(Integer sum) {
out.printf("Reading master file %s%n", masterFile); out.printf("Reading master file %s%n", masterFile);
DiffElement master = diffEngine.createDiffableFromFile(masterFile); DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
out.printf("Reading test file %s%n", testFile); out.printf("Reading test file %s%n", testFile);
DiffElement test = diffEngine.createDiffableFromFile(testFile); DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
// out.printf("Master diff objects%n"); // out.printf("Master diff objects%n");
// out.println(master.toString()); // out.println(master.toString());
@ -107,7 +110,7 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
out.printf("DIFF: %s%n", diff.toString()); out.printf("DIFF: %s%n", diff.toString());
} }
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_RECORDS, MAX_COUNT1_RECORDS, minCountForDiff); DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
diffEngine.reportSummarizedDifferences(diffs, params); diffEngine.reportSummarizedDifferences(diffs, params);
} }
} }

View File

@ -43,7 +43,7 @@ public interface DiffableReader {
@Ensures("result != null") @Ensures("result != null")
@Requires("file != null") @Requires("file != null")
public DiffElement readFromFile(File file); public DiffElement readFromFile(File file, int maxElementsToRead);
@Requires("file != null") @Requires("file != null")
public boolean canRead(File file); public boolean canRead(File file);

View File

@ -51,15 +51,21 @@ public class VCFDiffableReader implements DiffableReader {
public String getName() { return "VCF"; } public String getName() { return "VCF"; }
@Override @Override
public DiffElement readFromFile(File file) { public DiffElement readFromFile(File file, int maxElementsToRead) {
DiffNode root = DiffNode.rooted(file.getName()); DiffNode root = DiffNode.rooted(file.getName());
try { try {
LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
VCFCodec vcfCodec = new VCFCodec(); VCFCodec vcfCodec = new VCFCodec();
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
// must be read as state is stored in reader itself
vcfCodec.readHeader(lineReader);
String line = lineReader.readLine(); String line = lineReader.readLine();
int count = 0;
while ( line != null ) { while ( line != null ) {
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break;
VariantContext vc = (VariantContext)vcfCodec.decode(line); VariantContext vc = (VariantContext)vcfCodec.decode(line);
String name = vc.getChr() + ":" + vc.getStart(); String name = vc.getChr() + ":" + vc.getStart();
DiffNode vcRoot = DiffNode.empty(name, root); DiffNode vcRoot = DiffNode.empty(name, root);

View File

@ -80,7 +80,7 @@ public class DiffableReaderUnitTest extends BaseTest {
Assert.assertTrue(vcfReader.canRead(vcfFile)); Assert.assertTrue(vcfReader.canRead(vcfFile));
Assert.assertFalse(vcfReader.canRead(bamFile)); Assert.assertFalse(vcfReader.canRead(bamFile));
DiffElement diff = vcfReader.readFromFile(vcfFile); DiffElement diff = vcfReader.readFromFile(vcfFile, -1);
Assert.assertNotNull(diff); Assert.assertNotNull(diff);
Assert.assertEquals(diff.getName(), vcfFile.getName()); Assert.assertEquals(diff.getName(), vcfFile.getName());
@ -110,7 +110,7 @@ public class DiffableReaderUnitTest extends BaseTest {
Assert.assertTrue(bamReader.canRead(bamFile)); Assert.assertTrue(bamReader.canRead(bamFile));
Assert.assertFalse(bamReader.canRead(vcfFile)); Assert.assertFalse(bamReader.canRead(vcfFile));
DiffElement diff = bamReader.readFromFile(bamFile); DiffElement diff = bamReader.readFromFile(bamFile, -1);
Assert.assertNotNull(diff); Assert.assertNotNull(diff);
Assert.assertEquals(diff.getName(), bamFile.getName()); Assert.assertEquals(diff.getName(), bamFile.getName());