New:
Minor changes to CoverageAndPowerWalker bootstrapping (faster selection of indeces). Entirely new Aritifical Pool Walker (ArtificialPoolWalkerMk2), will likely replace ArtificialPoolWalker on the next commit. Adapted the method of sampling, and added a helper context class: ArtificialPoolContext which carries much of the burden of calculation and data handling for the walker. The walker itself maps and reduces ArtificialPoolContexts. Cheers! git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1461 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
92ea947c33
commit
1da45cffb3
|
|
@ -29,6 +29,8 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
|||
String pathToSyzygyFile = null;
|
||||
@Argument(fullName = "ColumnOffset", shortName = "co", doc = "Offset of column containing the power in the pf", required = true)
|
||||
int colOffset = 0;
|
||||
@Argument(fullName = "linesToClear", shortName="clr", doc = "Clear so many lines from the read file before starting (default - just the header line)", required = false)
|
||||
int clrLines = 1;
|
||||
|
||||
BufferedReader syzyFileReader;
|
||||
final String pfFileDelimiter = " ";
|
||||
|
|
@ -40,7 +42,9 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
|||
super.initialize();
|
||||
try {
|
||||
syzyFileReader = new BufferedReader(new FileReader(pathToSyzygyFile));
|
||||
syzyFileReader.readLine();
|
||||
for(int clear = 0; clear < clrLines; clear++) {
|
||||
syzyFileReader.readLine();
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
String newErrMsg = "Syzygy input file " + pathToSyzygyFile + " could be incorrect. File not found.";
|
||||
throw new StingException(newErrMsg,e);
|
||||
|
|
@ -69,8 +73,8 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
|||
if(!syzyFileIsReady) {
|
||||
throw new StingException("Input file reader was not ready before an attempt to read from it.");
|
||||
} else if(!outOfLinesInSyzyFile) {
|
||||
double syzyPow = getSyzyPowFromFile();
|
||||
out.printf("%s: %d %d %f %f%n", context.getLocation(), context.getReads().size(),powpair.second,powpair.first,syzyPow);
|
||||
Pair<Double,String> syzyPow = getSyzyPowFromFile();
|
||||
out.printf("%s: %d %d %f %f (%s)%n", context.getLocation(), context.getReads().size(),powpair.second,powpair.first,syzyPow.first,syzyPow.second);
|
||||
} else {
|
||||
out.printf("%s: %d %d %f%n", context.getLocation(), context.getReads().size(),powpair.second,powpair.first);
|
||||
}
|
||||
|
|
@ -79,7 +83,7 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
|||
return context.getReads().size();
|
||||
}
|
||||
|
||||
public double getSyzyPowFromFile() {
|
||||
public Pair<Double,String> getSyzyPowFromFile() {
|
||||
String thisLine = null;
|
||||
try {
|
||||
thisLine = syzyFileReader.readLine();
|
||||
|
|
@ -87,15 +91,17 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
|||
String newErrMsg = "Ran out of lines in the syzyfile; further output of Syzygy power will be suppressed.";
|
||||
outOfLinesInSyzyFile=true;
|
||||
logger.warn(newErrMsg + " " + e.toString());
|
||||
return -1.1;
|
||||
return new Pair(-1.1, "Printing Stops Here");
|
||||
}
|
||||
|
||||
String chromPos = null;
|
||||
StringTokenizer lineTokenizer = new StringTokenizer(thisLine, pfFileDelimiter);
|
||||
try {
|
||||
for(int j = 0; j < colOffset; j++) {
|
||||
chromPos = lineTokenizer.nextToken();
|
||||
for(int j = 1; j < colOffset; j++) {
|
||||
lineTokenizer.nextToken();
|
||||
}
|
||||
return (Double.valueOf(lineTokenizer.nextToken())/100.0);
|
||||
return new Pair((Double.valueOf(lineTokenizer.nextToken())/100.0),chromPos);
|
||||
} catch (NoSuchElementException e) {
|
||||
String errMsg = "The given column offset for the pool, " + colOffset + " exceeded the number of entries in the file " + pathToSyzygyFile;
|
||||
throw new StingException(errMsg);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,146 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.playground.utils.ArtificialPoolContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Aug 26, 2009
|
||||
* Time: 11:28:26 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ArtificalPoolWalkerMk2 extends LocusWalker<ArtificialPoolContext, ArtificialPoolContext> {
|
||||
|
||||
@Argument(fullName = "AuxOutputFile", shortName = "af", doc = "Auxiliary file for genotyp & coverage output", required = true)
|
||||
String auxFilePath = null;
|
||||
@Argument(fullName = "OutputBamFile", shortName = "of", doc = "Output to this file rather than standard output", required = false)
|
||||
SAMFileWriter outputBamFile = null;
|
||||
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
public ArtificialPoolContext reduceInit() { // try to initialize the file writer
|
||||
ArtificialPoolContext apContext = new ArtificialPoolContext();
|
||||
apContext.setSingleSampleGenotyper(new SingleSampleGenotyper());
|
||||
apContext.setReadGroupSets(getToolkit().getMergedReadGroupsByReaders());
|
||||
apContext.setAuxWriter(initializeAuxFileWriter(apContext.getTotalNumberOfPeople()));
|
||||
apContext.setSAMFileWriter(outputBamFile);
|
||||
apContext.initializeSSG();
|
||||
|
||||
return apContext;
|
||||
}
|
||||
|
||||
public ArtificialPoolContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return new ArtificialPoolContext(tracker,ref,context);
|
||||
}
|
||||
|
||||
public ArtificialPoolContext reduce(ArtificialPoolContext mapCon, ArtificialPoolContext redCon){
|
||||
ArtificialPoolContext updatedContext = ArtificialPoolContext.mapReduceMerge(mapCon,redCon);
|
||||
List<SAMRecord>[] newReads = updatedContext.splitReadsByGroup(updatedContext.getNewReads());
|
||||
long[] newCvg = updateRunningCoverage(updatedContext.getRunningCoverage(), getCoverageByGroup(newReads));
|
||||
updatedContext.setRunningCoverage(newCvg);
|
||||
List<SAMRecord>[] sampledReads = ArtificialPoolContext.sampleReads(newReads,runningCoverageToDouble(newCvg));
|
||||
printToFiles(sampledReads,updatedContext);
|
||||
return updatedContext;
|
||||
}
|
||||
|
||||
// Helper methods follow
|
||||
|
||||
private PrintWriter initializeAuxFileWriter(int nFiles) {
|
||||
PrintWriter auxFileWriter;
|
||||
try {
|
||||
auxFileWriter = new PrintWriter(new FileOutputStream(auxFilePath));
|
||||
auxFileWriter.println(createAuxFileHeader(nFiles));
|
||||
} catch(FileNotFoundException e) {
|
||||
String errmsg = "The filepath you entered "+auxFilePath+" could not be opened. Please double-check that the input is correct.";
|
||||
throw new StingException(errmsg, e);
|
||||
} catch(IOException e) {
|
||||
String errmsg = "The file you entered "+auxFilePath+" could not be written to. Please check your permissions to write to this file.";
|
||||
throw new StingException(errmsg,e);
|
||||
}
|
||||
|
||||
return auxFileWriter;
|
||||
}
|
||||
|
||||
private String createAuxFileHeader(int nFiles) {
|
||||
String sp = " ";
|
||||
String st1 = "Chrom:Pos" + sp;
|
||||
String st2 = "";
|
||||
for(int j = 0; j < nFiles; j++) {
|
||||
st2 = st2 + "Pers " + j + " Gen" + sp; // short for "genotype of person j at this location"
|
||||
st2 = st2 + "Pers " + j + " Conf" + sp; // short for "confidence in genotype call of ..."
|
||||
st2 = st2 + "Pers " + j + " NewCvg" + sp; // short for "coverage of person j at this location"
|
||||
}
|
||||
String st3 = "TotalCvg";
|
||||
|
||||
return st1+st2+st3;
|
||||
}
|
||||
|
||||
private int[] getCoverageByGroup(List<SAMRecord>[] readsByGroup) {
|
||||
int[] coverage = new int[readsByGroup.length];
|
||||
for(int iterator = 0; iterator < readsByGroup.length; iterator ++) {
|
||||
coverage[iterator] = readsByGroup[iterator].size();
|
||||
}
|
||||
|
||||
return coverage;
|
||||
}
|
||||
|
||||
private long[] updateRunningCoverage(long[] cvgUpToNow, int[] newCvgByGroup) {
|
||||
long[] newCvg = new long[cvgUpToNow.length];
|
||||
for(int iter = 0; iter < cvgUpToNow.length; iter++) {
|
||||
newCvg[iter] = cvgUpToNow[iter] + newCvgByGroup[iter];
|
||||
}
|
||||
|
||||
return newCvg;
|
||||
}
|
||||
|
||||
private double[] runningCoverageToDouble(long[] cvg) {
|
||||
double[] avgProp = new double[cvg.length];
|
||||
long sum = 0;
|
||||
for(long elem : cvg) {
|
||||
sum += elem;
|
||||
}
|
||||
for(int iter = 0; iter < cvg.length; iter++) {
|
||||
avgProp[iter] = cvg[iter]/sum;
|
||||
}
|
||||
|
||||
return avgProp;
|
||||
}
|
||||
|
||||
private void printToFiles(List<SAMRecord>[] sampledNewReads, ArtificialPoolContext context) {
|
||||
SAMFileWriter samWrite = context.getSAMFileWriter();
|
||||
String sp = " ";
|
||||
PrintWriter auxWrite = context.getWriterToAuxiliaryFile();
|
||||
int readGroupInt = 0;
|
||||
for(List<SAMRecord> readGroup : sampledNewReads) {
|
||||
for(SAMRecord read : readGroup) {
|
||||
samWrite.addAlignment(read);
|
||||
}
|
||||
auxWrite.print(context.getAlignmentContext().getLocation().toString() + sp);
|
||||
auxWrite.print(context.genotypeAndConfidenceToString(readGroupInt,sp));
|
||||
readGroupInt++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -54,6 +54,7 @@ public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFile
|
|||
private LinkedList<Integer>[] living_reads;
|
||||
private SingleSampleGenotyper ssg;
|
||||
private int npeople;
|
||||
//TODO: LOCAL CLASS FOR ALL THIS
|
||||
//@param local_genotypes - holds the genotype (A A/ A C/ etc) for each individual. Updates at each locus.
|
||||
//@param auxWrite - the writer to the auxiliary file
|
||||
//@param readGroupSets : holds the readgroups (identifiers for individuals from each read)
|
||||
|
|
@ -86,8 +87,6 @@ public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFile
|
|||
|
||||
if(red_prop <= 0) {
|
||||
red_prop = 1.0/npeople;
|
||||
} else {
|
||||
// do nothing muhahaha
|
||||
}
|
||||
|
||||
// initialize the local genotype array
|
||||
|
|
@ -112,7 +111,7 @@ public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFile
|
|||
|
||||
updateLiving();
|
||||
// each time we move to the next locus, remove from the coverage count those reads that ended
|
||||
auxWrite.printf("%s:%s",context.getContig(),context.getPosition());
|
||||
auxWrite.printf("%s:%s",context.getContig(),context.getPosition()); // TODO: PUT IN REDUCE
|
||||
|
||||
return getNewReadsAndGenotypesByGroup(tracker, ref, context);
|
||||
}
|
||||
|
|
@ -134,7 +133,6 @@ public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFile
|
|||
List<SAMRecord>[] randomReadsByGroup = drawReadsRandomlyFromReadsByGroup(readsByReadGroup,sought_coverage);
|
||||
printToFileAndAuxFile(randomReadsByGroup,sought_coverage,outFile);
|
||||
return outFile;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -32,15 +32,10 @@ public class CoverageAndPowerWalker extends LocusWalker<Integer, Pair<Long, Long
|
|||
|
||||
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls")
|
||||
public double threshold = 3.0;
|
||||
|
||||
|
||||
|
||||
private static final int BOOTSTRAP_ITERATIONS = 300;
|
||||
|
||||
|
||||
@Override
|
||||
public void initialize()
|
||||
{
|
||||
public void initialize() {
|
||||
|
||||
if(num_individuals <= 0)
|
||||
throw new IllegalArgumentException("Positive nonzero parameter expected for poolSize");
|
||||
|
|
@ -117,9 +112,7 @@ public class CoverageAndPowerWalker extends LocusWalker<Integer, Pair<Long, Long
|
|||
double pow = 0;
|
||||
|
||||
if(depth - kaccept < kaccept) {// kaccept > depth/2 - calculate power as P[hits between k and depth]
|
||||
|
||||
for(int k = kaccept; k < depth; k++) {
|
||||
|
||||
pow += MathUtils.binomialProbabilityLog(k, depth, snp_prop);
|
||||
}
|
||||
|
||||
|
|
@ -153,14 +146,8 @@ public class CoverageAndPowerWalker extends LocusWalker<Integer, Pair<Long, Long
|
|||
if (depth <= 0) {
|
||||
result = new Pair(-1,-1);
|
||||
} else if (!use_bootstrap) { // object data from command line
|
||||
|
||||
result = powerTheoretical(depth, reads, offsets, single_snip_proportion);
|
||||
} else {
|
||||
|
||||
//
|
||||
// otherwise, bootstrapping occurs below
|
||||
//
|
||||
|
||||
} else { // otherwise, bootstrapping occurs below
|
||||
int hypothesis_rejections=0;
|
||||
|
||||
for(int boot = 0; boot < BOOTSTRAP_ITERATIONS; boot++)
|
||||
|
|
@ -196,15 +183,7 @@ public class CoverageAndPowerWalker extends LocusWalker<Integer, Pair<Long, Long
|
|||
|
||||
public int randomlySelectRead(int depth)
|
||||
{
|
||||
double qscore_selector = Math.random();
|
||||
int readspositionrandom;
|
||||
for(readspositionrandom = 1; readspositionrandom < ((double)depth * qscore_selector); readspositionrandom ++) {
|
||||
if(readspositionrandom > depth + 1) {
|
||||
throw new RuntimeException("qscore iterator exceeding possible thresholds");
|
||||
}
|
||||
}
|
||||
|
||||
return readspositionrandom - 1;
|
||||
return (int) Math.floor((double)depth * Math.random());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,286 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Set;
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Aug 26, 2009
|
||||
* Time: 11:37:42 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ArtificialPoolContext {
|
||||
private PrintWriter writerToAuxiliaryFile;
|
||||
private SAMFileWriter writerToSamFile;
|
||||
private SingleSampleGenotyper ssg;
|
||||
private List<Set<String>> readGroupSets;
|
||||
private long[] runningCoverage;
|
||||
private RefMetaDataTracker refTracker;
|
||||
private ReferenceContext refContext;
|
||||
private AlignmentContext aliContext;
|
||||
|
||||
public ArtificialPoolContext() {
|
||||
readGroupSets = null;
|
||||
writerToAuxiliaryFile = null;
|
||||
writerToSamFile = null;
|
||||
ssg = null;
|
||||
refTracker = null;
|
||||
aliContext = null;
|
||||
refContext = null;
|
||||
runningCoverage = null;
|
||||
}
|
||||
|
||||
public ArtificialPoolContext(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
refTracker = tracker;
|
||||
refContext = ref;
|
||||
aliContext = context;
|
||||
readGroupSets = null;
|
||||
writerToAuxiliaryFile = null;
|
||||
writerToSamFile=null;
|
||||
ssg = null;
|
||||
runningCoverage = null;
|
||||
}
|
||||
|
||||
public ArtificialPoolContext(PrintWriter pw, SAMFileWriter sw, SingleSampleGenotyper g, List<Set<String>> rgs, long [] runcvg, RefMetaDataTracker rt, ReferenceContext rc, AlignmentContext ac) {
|
||||
writerToAuxiliaryFile = pw;
|
||||
writerToSamFile = sw;
|
||||
ssg = g;
|
||||
readGroupSets = rgs;
|
||||
runningCoverage = runcvg;
|
||||
refTracker = rt;
|
||||
refContext = rc;
|
||||
aliContext = ac;
|
||||
}
|
||||
|
||||
public void setAuxWriter(PrintWriter writer) {
|
||||
writerToAuxiliaryFile = writer;
|
||||
}
|
||||
|
||||
public void setSingleSampleGenotyper(SingleSampleGenotyper typer) {
|
||||
ssg = typer;
|
||||
}
|
||||
|
||||
public void initializeSSG() {
|
||||
ssg.initialize();
|
||||
}
|
||||
|
||||
public void setReadGroupSets(List<Set<String>> rgSets) {
|
||||
readGroupSets = rgSets;
|
||||
}
|
||||
|
||||
public void setRefMetaDataTracker(RefMetaDataTracker tracker) {
|
||||
refTracker = tracker;
|
||||
}
|
||||
|
||||
public void setReferenceContext(ReferenceContext ref) {
|
||||
refContext = ref;
|
||||
}
|
||||
|
||||
public void setAlignmentContext(AlignmentContext context) {
|
||||
aliContext = context;
|
||||
}
|
||||
|
||||
public void setRunningCoverage(long[] estimate) {
|
||||
runningCoverage = estimate;
|
||||
}
|
||||
|
||||
public void setSAMFileWriter(SAMFileWriter writer) {
|
||||
writerToSamFile = writer;
|
||||
}
|
||||
|
||||
public int getTotalNumberOfPeople() {
|
||||
return readGroupSets.size();
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getRefMetaDataTracker() {
|
||||
return refTracker;
|
||||
}
|
||||
|
||||
public ReferenceContext getReferenceContext() {
|
||||
return refContext;
|
||||
}
|
||||
|
||||
public AlignmentContext getAlignmentContext() {
|
||||
return aliContext;
|
||||
}
|
||||
|
||||
public PrintWriter getWriterToAuxiliaryFile() {
|
||||
return writerToAuxiliaryFile;
|
||||
}
|
||||
|
||||
public SingleSampleGenotyper getSingleSampleGenotyper() {
|
||||
return ssg;
|
||||
}
|
||||
|
||||
public List<Set<String>> getReadGroupSets() {
|
||||
return readGroupSets;
|
||||
}
|
||||
|
||||
public long[] getRunningCoverage() {
|
||||
return runningCoverage;
|
||||
}
|
||||
|
||||
public SAMFileWriter getSAMFileWriter() {
|
||||
return writerToSamFile;
|
||||
}
|
||||
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads;
|
||||
if(aliContext == null) {
|
||||
reads=null;
|
||||
} else {
|
||||
reads = aliContext.getReads();
|
||||
}
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets;
|
||||
if(aliContext == null) {
|
||||
offsets = null;
|
||||
} else {
|
||||
offsets = aliContext.getOffsets();
|
||||
}
|
||||
|
||||
return offsets;
|
||||
}
|
||||
|
||||
public List<SAMRecord> getNewReads() {
|
||||
List<SAMRecord> newReads;
|
||||
|
||||
if(aliContext == null) {
|
||||
newReads = null;
|
||||
} else {
|
||||
newReads = new LinkedList<SAMRecord>();
|
||||
List<SAMRecord> allReads = aliContext.getReads();
|
||||
List<Integer> allOffsets = aliContext.getOffsets();
|
||||
|
||||
for(int iter = 0; iter < allReads.size(); iter++) {
|
||||
if(allOffsets.get(iter) == 0) {
|
||||
newReads.add(allReads.get(iter));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return newReads;
|
||||
}
|
||||
|
||||
public Pair<List<SAMRecord>[],List<Integer>[]> splitByGroup(List<SAMRecord> unsplitReads, List<Integer> unsplitOffsets) {
|
||||
List<SAMRecord>[] readsSplitByGroup;
|
||||
List<Integer> [] offsetsSplitByGroup;
|
||||
|
||||
if(unsplitReads != null && readGroupSets != null) {
|
||||
readsSplitByGroup = new ArrayList[this.getTotalNumberOfPeople()];
|
||||
if(unsplitOffsets != null) {
|
||||
offsetsSplitByGroup = new ArrayList[this.getTotalNumberOfPeople()];
|
||||
}
|
||||
else {
|
||||
offsetsSplitByGroup = null;
|
||||
}
|
||||
int listSize = unsplitReads.size();
|
||||
for(int element = 0; element < listSize; element++) {
|
||||
SAMRecord read = unsplitReads.get(element);
|
||||
for(int groupNumber = 0; groupNumber < this.getTotalNumberOfPeople(); groupNumber++) {
|
||||
if(readGroupSets.get(groupNumber).contains((String) read.getAttribute("RG"))) {
|
||||
readsSplitByGroup[groupNumber].add(read);
|
||||
if(offsetsSplitByGroup != null) {
|
||||
offsetsSplitByGroup[groupNumber].add(unsplitOffsets.get(element));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
readsSplitByGroup = null;
|
||||
offsetsSplitByGroup = null; // compiler complains without these lines
|
||||
}
|
||||
|
||||
return new Pair(readsSplitByGroup,offsetsSplitByGroup);
|
||||
}
|
||||
|
||||
public List<SAMRecord>[] splitReadsByGroup(List<SAMRecord> unsplitReads) {
|
||||
return (this.splitByGroup(unsplitReads,null)).first;
|
||||
}
|
||||
|
||||
|
||||
// Static methods follow
|
||||
|
||||
public static ArtificialPoolContext mapReduceMerge(ArtificialPoolContext mapContext, ArtificialPoolContext reduceContext) {
|
||||
return new ArtificialPoolContext(reduceContext.getWriterToAuxiliaryFile(),reduceContext.getSAMFileWriter(),
|
||||
reduceContext.getSingleSampleGenotyper(), reduceContext.getReadGroupSets(), reduceContext.getRunningCoverage(),
|
||||
mapContext.getRefMetaDataTracker(),mapContext.getReferenceContext(),mapContext.getAlignmentContext());
|
||||
}
|
||||
|
||||
public static Pair<List<SAMRecord>[],List<Integer>> sampleReadsAndOffsets(List<SAMRecord>[] reads, List<Integer>[] offsets, double[] propEstGlobal) {
|
||||
double[] samplingRate = calculateSamplingRateFromGlobalEstimate(propEstGlobal);
|
||||
List<SAMRecord>[] sampledReads = new ArrayList[reads.length];
|
||||
List<Integer>[] sampledOffsets;
|
||||
if(offsets != null){
|
||||
sampledOffsets = new ArrayList[offsets.length];
|
||||
} else {
|
||||
sampledOffsets = null;
|
||||
}
|
||||
|
||||
for(int group = 0; group < reads.length; group++) {
|
||||
for(int readNumber = 0; readNumber < reads[group].size(); readNumber++) {
|
||||
if(Math.random() < samplingRate[group]) {
|
||||
sampledReads[group].add(reads[group].get(readNumber));
|
||||
if(sampledOffsets != null) {
|
||||
sampledOffsets[group].add(offsets[group].get(readNumber));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair(sampledReads,sampledOffsets);
|
||||
}
|
||||
|
||||
public String genotypeAndConfidenceToString(int group, String spacer) {
|
||||
GenotypeCall call = this.getGenotypeCall(group);
|
||||
return (call.getGenotypes() + spacer + call.getConfidenceScore().toString());
|
||||
}
|
||||
|
||||
public GenotypeCall getGenotypeCall(int group) {
|
||||
AlignmentContext alicon = this.getAlignmentContext();
|
||||
Pair<List<SAMRecord>[],List<Integer>[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets());
|
||||
return ssg.map(this.getRefMetaDataTracker(),this.getReferenceContext(),
|
||||
new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group]));
|
||||
}
|
||||
|
||||
public static List<SAMRecord>[] sampleReads(List<SAMRecord>[] reads, double[] propEstGlobal) {
|
||||
return (sampleReadsAndOffsets(reads, null, propEstGlobal)).first;
|
||||
}
|
||||
|
||||
public static double[] calculateSamplingRateFromGlobalEstimate(double[] ratios) {
|
||||
double min = ratios[0];
|
||||
for(double ratio : ratios) {
|
||||
if(ratio < min) {
|
||||
min = ratio;
|
||||
}
|
||||
}
|
||||
double[] samplingRate = new double[ratios.length];
|
||||
// now divide by minimum
|
||||
for(int j = 0; j < ratios.length; j++) {
|
||||
samplingRate[j] = ratios[j]/min;
|
||||
}
|
||||
|
||||
return samplingRate;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue