Computes the distribution of insert size per library (for now, one output file per library)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3334 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a51bd57566
commit
c111c15072
|
|
@ -0,0 +1,83 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* For each sequencing library, outputs the distribution of mate pair sizes
|
||||
*/
|
||||
public class MatePairLibrarySize extends ReadWalker<Integer, Integer> {
|
||||
@Argument(fullName="outdir", shortName="outdir", doc="Directory to output results")
|
||||
private File OUT_DIR;
|
||||
|
||||
private HashMap<String, HashMap<Integer, Integer>> matePairSize;
|
||||
|
||||
public void initialize() {
|
||||
matePairSize = new HashMap<String, HashMap<Integer, Integer>>();
|
||||
|
||||
for (SAMReadGroupRecord rg : this.getToolkit().getSAMFileHeader().getReadGroups()) {
|
||||
HashMap<Integer, Integer> mps = new HashMap<Integer, Integer>();
|
||||
|
||||
matePairSize.put(rg.getLibrary(), mps);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
return (read.getReadPairedFlag() && read.getFirstOfPairFlag());
|
||||
}
|
||||
|
||||
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
int insert = read.getInferredInsertSize();
|
||||
|
||||
Integer oldcount = matePairSize.get(read.getReadGroup().getLibrary()).get(insert);
|
||||
if (oldcount == null) { oldcount = 0; }
|
||||
|
||||
matePairSize.get(read.getReadGroup().getLibrary()).put(insert, oldcount + 1);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
String[] libraries = matePairSize.keySet().toArray(new String[1]);
|
||||
|
||||
for (String library : libraries) {
|
||||
try {
|
||||
Integer[] sizes = matePairSize.get(library).keySet().toArray(new Integer[1]);
|
||||
|
||||
if (sizes != null && sizes.length > 1) {
|
||||
PrintWriter pw = new PrintWriter(String.format("%s/%s.pairdist", OUT_DIR.getAbsolutePath(), library));
|
||||
Arrays.sort(sizes);
|
||||
|
||||
pw.printf("%s\t%s%n", "insert", "frequency");
|
||||
|
||||
for (int insert : sizes) {
|
||||
if (insert >= 0) {
|
||||
pw.printf("%d\t%d%n", insert, matePairSize.get(library).get(insert));
|
||||
}
|
||||
}
|
||||
|
||||
pw.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to initialize output files.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue