diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java index f7a395d9d..a5ebf27bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java @@ -51,12 +51,11 @@ import java.util.zip.GZIPInputStream; * Class implementing diffnode reader for VCF */ public class BAMDiffableReader implements DiffableReader { - private final static int MAX_RECORDS_TO_READ = 1000; @Override public String getName() { return "BAM"; } @Override - public DiffElement readFromFile(File file) { + public DiffElement readFromFile(File file, int maxElementsToRead) { final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); @@ -65,7 +64,7 @@ public class BAMDiffableReader implements DiffableReader { int count = 0; while ( iterator.hasNext() ) { - if ( count++ > MAX_RECORDS_TO_READ ) + if ( count++ > maxElementsToRead && maxElementsToRead != -1) break; final SAMRecord record = iterator.next(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index ba2713bff..54a7a464d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -385,12 +385,17 @@ public class DiffEngine { return findReaderForFile(file) != null; } + public DiffElement createDiffableFromFile(File file) { + return createDiffableFromFile(file, -1); + } + + public DiffElement createDiffableFromFile(File file, int maxElementsToRead) { DiffableReader reader = findReaderForFile(file); if ( reader == null ) throw new UserException("Unsupported file type: " + file); else - return reader.readFromFile(file); + return reader.readFromFile(file, maxElementsToRead); } public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index a08108db2..fe411b195 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -48,11 +48,14 @@ public class DiffObjectsWalker extends RodWalker { @Output(doc="File to which results should be written",required=true) protected PrintStream out; - @Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false) - int MAX_RECORDS = 0; + @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + int MAX_OBJECTS_TO_READ = -1; - @Argument(fullName="maxCount1Records", shortName="M1", doc="Max. number of records occuring exactly once in the file to process", required=false) - int MAX_COUNT1_RECORDS = 0; + @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false) + int MAX_DIFFS = 0; + + @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false) + int MAX_COUNT1_DIFFS = 0; @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) int minCountForDiff = 1; @@ -91,9 +94,9 @@ public class DiffObjectsWalker extends RodWalker { @Override public void onTraversalDone(Integer sum) { out.printf("Reading master file %s%n", masterFile); - DiffElement master = diffEngine.createDiffableFromFile(masterFile); + DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ); out.printf("Reading test file %s%n", testFile); - DiffElement test = diffEngine.createDiffableFromFile(testFile); + DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ); // out.printf("Master diff objects%n"); // out.println(master.toString()); @@ -107,7 +110,7 @@ public class DiffObjectsWalker extends RodWalker { out.printf("DIFF: %s%n", diff.toString()); } - DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_RECORDS, MAX_COUNT1_RECORDS, minCountForDiff); + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff); diffEngine.reportSummarizedDifferences(diffs, params); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java index 84c2eed10..af5771c55 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java @@ -43,7 +43,7 @@ public interface DiffableReader { @Ensures("result != null") @Requires("file != null") - public DiffElement readFromFile(File file); + public DiffElement readFromFile(File file, int maxElementsToRead); @Requires("file != null") public boolean canRead(File file); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 743178538..06d14366f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -51,15 +51,21 @@ public class VCFDiffableReader implements DiffableReader { public String getName() { return "VCF"; } @Override - public DiffElement readFromFile(File file) { + public DiffElement readFromFile(File file, int maxElementsToRead) { DiffNode root = DiffNode.rooted(file.getName()); try { LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); VCFCodec vcfCodec = new VCFCodec(); - VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); + + // must be read as state is stored in reader itself + vcfCodec.readHeader(lineReader); String line = lineReader.readLine(); + int count = 0; while ( line != null ) { + if ( count++ > maxElementsToRead && maxElementsToRead != -1) + break; + VariantContext vc = (VariantContext)vcfCodec.decode(line); String name = vc.getChr() + ":" + vc.getStart(); DiffNode vcRoot = DiffNode.empty(name, root); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java index 5738b643f..baa2f0383 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -80,7 +80,7 @@ public class DiffableReaderUnitTest extends BaseTest { Assert.assertTrue(vcfReader.canRead(vcfFile)); Assert.assertFalse(vcfReader.canRead(bamFile)); - DiffElement diff = vcfReader.readFromFile(vcfFile); + DiffElement diff = vcfReader.readFromFile(vcfFile, -1); Assert.assertNotNull(diff); Assert.assertEquals(diff.getName(), vcfFile.getName()); @@ -110,7 +110,7 @@ public class DiffableReaderUnitTest extends BaseTest { Assert.assertTrue(bamReader.canRead(bamFile)); Assert.assertFalse(bamReader.canRead(vcfFile)); - DiffElement diff = bamReader.readFromFile(bamFile); + DiffElement diff = bamReader.readFromFile(bamFile, -1); Assert.assertNotNull(diff); Assert.assertEquals(diff.getName(), bamFile.getName());