a utility walker for validating changes made to the underlying ROD system in the transistion to Tribble.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3258 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d9bf441391
commit
68bdac254b
|
|
@ -0,0 +1,124 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.validation;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.math.BigInteger;
|
||||||
|
import java.security.MessageDigest;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a walker for validating (in the style of validating pile-up) the ROD system.
|
||||||
|
*/
|
||||||
|
public class RodSystemValidationWalker extends RodWalker<Integer,Integer> {
|
||||||
|
|
||||||
|
// the divider to use in some of the text output
|
||||||
|
private static final String DIVIDER = "-->";
|
||||||
|
|
||||||
|
// used to calculate the MD5 of a file
|
||||||
|
MessageDigest digest = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* emit the md5 sums for each of the input ROD files (will save up a lot of time if and when the ROD files change
|
||||||
|
* underneath us).
|
||||||
|
*/
|
||||||
|
public void initialize() {
|
||||||
|
// setup the MD5-er
|
||||||
|
try {
|
||||||
|
digest = MessageDigest.getInstance("MD5");
|
||||||
|
} catch (NoSuchAlgorithmException e) {
|
||||||
|
throw new StingException("Unable to find MD5 checksumer");
|
||||||
|
}
|
||||||
|
out.println("Header:");
|
||||||
|
// enumerate the list of ROD's we've loaded
|
||||||
|
List<ReferenceOrderedDataSource> rodList = GenomeAnalysisEngine.instance.getRodDataSources();
|
||||||
|
for (ReferenceOrderedDataSource rod : rodList) {
|
||||||
|
out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getType());
|
||||||
|
out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getFile());
|
||||||
|
out.println(rod.getName() + DIVIDER + md5sum(rod.getReferenceOrderedData().getFile()));
|
||||||
|
}
|
||||||
|
out.println("Data:");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param tracker the ref meta data tracker to get RODs
|
||||||
|
* @param ref reference context
|
||||||
|
* @param context the reads
|
||||||
|
* @return an 1 for each site with a rod, 0 otherwise
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if (tracker != null && tracker.getAllRods().size() > 0) {
|
||||||
|
out.print(context.getLocation() + DIVIDER);
|
||||||
|
Collection<GATKFeature> features = tracker.getAllRods();
|
||||||
|
for (GATKFeature feat : features)
|
||||||
|
out.print(feat.getName() + DIVIDER);
|
||||||
|
out.println(";");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provide an initial value for reduce computations.
|
||||||
|
*
|
||||||
|
* @return Initial value of reduce.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||||
|
*
|
||||||
|
* @param value result of the map.
|
||||||
|
* @param sum accumulator for the reduce.
|
||||||
|
* @return accumulator with result of the map taken into account.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return value + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// shamelessly absconded and adapted from http://www.javalobby.org/java/forums/t84420.html
|
||||||
|
private String md5sum(File f) {
|
||||||
|
InputStream is;
|
||||||
|
try {
|
||||||
|
is = new FileInputStream(f);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new StingException("Unable to get a file input stream from " + f, e);
|
||||||
|
}
|
||||||
|
byte[] buffer = new byte[8192];
|
||||||
|
int read = 0;
|
||||||
|
try {
|
||||||
|
while ((read = is.read(buffer)) > 0) {
|
||||||
|
digest.update(buffer, 0, read);
|
||||||
|
}
|
||||||
|
byte[] md5sum = digest.digest();
|
||||||
|
BigInteger bigInt = new BigInteger(1, md5sum);
|
||||||
|
return bigInt.toString(16);
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new RuntimeException("Unable to process file for MD5", e);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
try {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new RuntimeException("Unable to close input stream for MD5 calculation", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue