reader now takes an argument for the maximum number of elements to read from the file.
This commit is contained in:
parent
8056a3fe89
commit
05212aea62
|
|
@ -51,12 +51,11 @@ import java.util.zip.GZIPInputStream;
|
||||||
* Class implementing diffnode reader for VCF
|
* Class implementing diffnode reader for VCF
|
||||||
*/
|
*/
|
||||||
public class BAMDiffableReader implements DiffableReader {
|
public class BAMDiffableReader implements DiffableReader {
|
||||||
private final static int MAX_RECORDS_TO_READ = 1000;
|
|
||||||
@Override
|
@Override
|
||||||
public String getName() { return "BAM"; }
|
public String getName() { return "BAM"; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DiffElement readFromFile(File file) {
|
public DiffElement readFromFile(File file, int maxElementsToRead) {
|
||||||
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
|
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
|
||||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||||
|
|
||||||
|
|
@ -65,7 +64,7 @@ public class BAMDiffableReader implements DiffableReader {
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while ( iterator.hasNext() ) {
|
while ( iterator.hasNext() ) {
|
||||||
if ( count++ > MAX_RECORDS_TO_READ )
|
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||||
break;
|
break;
|
||||||
final SAMRecord record = iterator.next();
|
final SAMRecord record = iterator.next();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -385,12 +385,17 @@ public class DiffEngine {
|
||||||
return findReaderForFile(file) != null;
|
return findReaderForFile(file) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public DiffElement createDiffableFromFile(File file) {
|
public DiffElement createDiffableFromFile(File file) {
|
||||||
|
return createDiffableFromFile(file, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DiffElement createDiffableFromFile(File file, int maxElementsToRead) {
|
||||||
DiffableReader reader = findReaderForFile(file);
|
DiffableReader reader = findReaderForFile(file);
|
||||||
if ( reader == null )
|
if ( reader == null )
|
||||||
throw new UserException("Unsupported file type: " + file);
|
throw new UserException("Unsupported file type: " + file);
|
||||||
else
|
else
|
||||||
return reader.readFromFile(file);
|
return reader.readFromFile(file, maxElementsToRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
|
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
|
||||||
|
|
|
||||||
|
|
@ -48,11 +48,14 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
||||||
@Output(doc="File to which results should be written",required=true)
|
@Output(doc="File to which results should be written",required=true)
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
|
@Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
|
||||||
int MAX_RECORDS = 0;
|
int MAX_OBJECTS_TO_READ = -1;
|
||||||
|
|
||||||
@Argument(fullName="maxCount1Records", shortName="M1", doc="Max. number of records occuring exactly once in the file to process", required=false)
|
@Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false)
|
||||||
int MAX_COUNT1_RECORDS = 0;
|
int MAX_DIFFS = 0;
|
||||||
|
|
||||||
|
@Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false)
|
||||||
|
int MAX_COUNT1_DIFFS = 0;
|
||||||
|
|
||||||
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
|
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
|
||||||
int minCountForDiff = 1;
|
int minCountForDiff = 1;
|
||||||
|
|
@ -91,9 +94,9 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
||||||
@Override
|
@Override
|
||||||
public void onTraversalDone(Integer sum) {
|
public void onTraversalDone(Integer sum) {
|
||||||
out.printf("Reading master file %s%n", masterFile);
|
out.printf("Reading master file %s%n", masterFile);
|
||||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
|
||||||
out.printf("Reading test file %s%n", testFile);
|
out.printf("Reading test file %s%n", testFile);
|
||||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
|
||||||
|
|
||||||
// out.printf("Master diff objects%n");
|
// out.printf("Master diff objects%n");
|
||||||
// out.println(master.toString());
|
// out.println(master.toString());
|
||||||
|
|
@ -107,7 +110,7 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
||||||
out.printf("DIFF: %s%n", diff.toString());
|
out.printf("DIFF: %s%n", diff.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_RECORDS, MAX_COUNT1_RECORDS, minCountForDiff);
|
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
|
||||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -43,7 +43,7 @@ public interface DiffableReader {
|
||||||
|
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
@Requires("file != null")
|
@Requires("file != null")
|
||||||
public DiffElement readFromFile(File file);
|
public DiffElement readFromFile(File file, int maxElementsToRead);
|
||||||
|
|
||||||
@Requires("file != null")
|
@Requires("file != null")
|
||||||
public boolean canRead(File file);
|
public boolean canRead(File file);
|
||||||
|
|
|
||||||
|
|
@ -51,15 +51,21 @@ public class VCFDiffableReader implements DiffableReader {
|
||||||
public String getName() { return "VCF"; }
|
public String getName() { return "VCF"; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DiffElement readFromFile(File file) {
|
public DiffElement readFromFile(File file, int maxElementsToRead) {
|
||||||
DiffNode root = DiffNode.rooted(file.getName());
|
DiffNode root = DiffNode.rooted(file.getName());
|
||||||
try {
|
try {
|
||||||
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
|
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
|
||||||
VCFCodec vcfCodec = new VCFCodec();
|
VCFCodec vcfCodec = new VCFCodec();
|
||||||
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
|
|
||||||
|
// must be read as state is stored in reader itself
|
||||||
|
vcfCodec.readHeader(lineReader);
|
||||||
|
|
||||||
String line = lineReader.readLine();
|
String line = lineReader.readLine();
|
||||||
|
int count = 0;
|
||||||
while ( line != null ) {
|
while ( line != null ) {
|
||||||
|
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||||
|
break;
|
||||||
|
|
||||||
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
||||||
String name = vc.getChr() + ":" + vc.getStart();
|
String name = vc.getChr() + ":" + vc.getStart();
|
||||||
DiffNode vcRoot = DiffNode.empty(name, root);
|
DiffNode vcRoot = DiffNode.empty(name, root);
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ public class DiffableReaderUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(vcfReader.canRead(vcfFile));
|
Assert.assertTrue(vcfReader.canRead(vcfFile));
|
||||||
Assert.assertFalse(vcfReader.canRead(bamFile));
|
Assert.assertFalse(vcfReader.canRead(bamFile));
|
||||||
|
|
||||||
DiffElement diff = vcfReader.readFromFile(vcfFile);
|
DiffElement diff = vcfReader.readFromFile(vcfFile, -1);
|
||||||
Assert.assertNotNull(diff);
|
Assert.assertNotNull(diff);
|
||||||
|
|
||||||
Assert.assertEquals(diff.getName(), vcfFile.getName());
|
Assert.assertEquals(diff.getName(), vcfFile.getName());
|
||||||
|
|
@ -110,7 +110,7 @@ public class DiffableReaderUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(bamReader.canRead(bamFile));
|
Assert.assertTrue(bamReader.canRead(bamFile));
|
||||||
Assert.assertFalse(bamReader.canRead(vcfFile));
|
Assert.assertFalse(bamReader.canRead(vcfFile));
|
||||||
|
|
||||||
DiffElement diff = bamReader.readFromFile(bamFile);
|
DiffElement diff = bamReader.readFromFile(bamFile, -1);
|
||||||
Assert.assertNotNull(diff);
|
Assert.assertNotNull(diff);
|
||||||
|
|
||||||
Assert.assertEquals(diff.getName(), bamFile.getName());
|
Assert.assertEquals(diff.getName(), bamFile.getName());
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue