reader now takes an argument for the maximum number of elements to read from the file.

This commit is contained in:
Mark DePristo 2011-07-12 08:53:19 -04:00
parent 8056a3fe89
commit 05212aea62
6 changed files with 29 additions and 16 deletions

View File

@ -51,12 +51,11 @@ import java.util.zip.GZIPInputStream;
* Class implementing diffnode reader for VCF
*/
public class BAMDiffableReader implements DiffableReader {
private final static int MAX_RECORDS_TO_READ = 1000;
@Override
public String getName() { return "BAM"; }
@Override
public DiffElement readFromFile(File file) {
public DiffElement readFromFile(File file, int maxElementsToRead) {
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
@ -65,7 +64,7 @@ public class BAMDiffableReader implements DiffableReader {
int count = 0;
while ( iterator.hasNext() ) {
if ( count++ > MAX_RECORDS_TO_READ )
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break;
final SAMRecord record = iterator.next();

View File

@ -385,12 +385,17 @@ public class DiffEngine {
return findReaderForFile(file) != null;
}
public DiffElement createDiffableFromFile(File file) {
return createDiffableFromFile(file, -1);
}
public DiffElement createDiffableFromFile(File file, int maxElementsToRead) {
DiffableReader reader = findReaderForFile(file);
if ( reader == null )
throw new UserException("Unsupported file type: " + file);
else
return reader.readFromFile(file);
return reader.readFromFile(file, maxElementsToRead);
}
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {

View File

@ -48,11 +48,14 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
@Output(doc="File to which results should be written",required=true)
protected PrintStream out;
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
int MAX_RECORDS = 0;
@Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
int MAX_OBJECTS_TO_READ = -1;
@Argument(fullName="maxCount1Records", shortName="M1", doc="Max. number of records occuring exactly once in the file to process", required=false)
int MAX_COUNT1_RECORDS = 0;
@Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false)
int MAX_DIFFS = 0;
@Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false)
int MAX_COUNT1_DIFFS = 0;
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
int minCountForDiff = 1;
@ -91,9 +94,9 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
@Override
public void onTraversalDone(Integer sum) {
out.printf("Reading master file %s%n", masterFile);
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
out.printf("Reading test file %s%n", testFile);
DiffElement test = diffEngine.createDiffableFromFile(testFile);
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
// out.printf("Master diff objects%n");
// out.println(master.toString());
@ -107,7 +110,7 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
out.printf("DIFF: %s%n", diff.toString());
}
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_RECORDS, MAX_COUNT1_RECORDS, minCountForDiff);
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
diffEngine.reportSummarizedDifferences(diffs, params);
}
}

View File

@ -43,7 +43,7 @@ public interface DiffableReader {
@Ensures("result != null")
@Requires("file != null")
public DiffElement readFromFile(File file);
public DiffElement readFromFile(File file, int maxElementsToRead);
@Requires("file != null")
public boolean canRead(File file);

View File

@ -51,15 +51,21 @@ public class VCFDiffableReader implements DiffableReader {
public String getName() { return "VCF"; }
@Override
public DiffElement readFromFile(File file) {
public DiffElement readFromFile(File file, int maxElementsToRead) {
DiffNode root = DiffNode.rooted(file.getName());
try {
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
VCFCodec vcfCodec = new VCFCodec();
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
// must be read as state is stored in reader itself
vcfCodec.readHeader(lineReader);
String line = lineReader.readLine();
int count = 0;
while ( line != null ) {
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break;
VariantContext vc = (VariantContext)vcfCodec.decode(line);
String name = vc.getChr() + ":" + vc.getStart();
DiffNode vcRoot = DiffNode.empty(name, root);

View File

@ -80,7 +80,7 @@ public class DiffableReaderUnitTest extends BaseTest {
Assert.assertTrue(vcfReader.canRead(vcfFile));
Assert.assertFalse(vcfReader.canRead(bamFile));
DiffElement diff = vcfReader.readFromFile(vcfFile);
DiffElement diff = vcfReader.readFromFile(vcfFile, -1);
Assert.assertNotNull(diff);
Assert.assertEquals(diff.getName(), vcfFile.getName());
@ -110,7 +110,7 @@ public class DiffableReaderUnitTest extends BaseTest {
Assert.assertTrue(bamReader.canRead(bamFile));
Assert.assertFalse(bamReader.canRead(vcfFile));
DiffElement diff = bamReader.readFromFile(bamFile);
DiffElement diff = bamReader.readFromFile(bamFile, -1);
Assert.assertNotNull(diff);
Assert.assertEquals(diff.getName(), bamFile.getName());