git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1516 348d0f76-0448-11de-a6fe-93d51630548a

This commit is contained in:
chartl 2009-09-03 15:17:02 +00:00
parent 3ac5ac066f
commit 5130ca9b94
3 changed files with 250 additions and 325 deletions

View File

@ -1,356 +1,154 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* TERMS OF USE: WE ARE NOT LIABLE FOR ANYTHING
*/
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.playground.utils.ArtificialPoolContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import java.util.*;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Set;
import java.util.List;
import java.util.ListIterator;
import java.util.LinkedList;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
// Display the depth of coverage at a given locus and output the power
public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFileWriter> {
@Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false)
public boolean suppressPrinting = false;
@Argument(fullName="reductionProportion", shortName="rp", doc="Proportion of total coverage reflected in pool file",required=false)
public double red_prop = -1;
@Argument(fullName="auxOutputFile", shortName="af", doc="Filepath for Auxiliary pool information (true genotypes, etc.)", required=true)
public String auxFilepath = null;
@Argument(fullName="outputBamFile",shortName="of",doc="Output to this file rather than to standard output")
SAMFileWriter outputBamFile = null;
private List<Set<String>> readGroupSets;
private PrintWriter auxWrite;
private Pair<String,Double>[] local_genotypes;
private LinkedList<Integer>[] living_reads;
private SingleSampleGenotyper ssg;
private int npeople;
//TODO: LOCAL CLASS FOR ALL THIS
//@param local_genotypes - holds the genotype (A A/ A C/ etc) for each individual. Updates at each locus.
//@param auxWrite - the writer to the auxiliary file
//@param readGroupSets : holds the readgroups (identifiers for individuals from each read)
//@param ssg - the instance of the single sample genotyper
//@param living_reads - holds on to the selected reads from each person until the location passes the read endpoint
// Updates at each locus.
//@param npeople - number of people represented in this pool (size of readGroupSets)
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Aug 26, 2009
* Time: 11:28:26 AM
* To change this template use File | Settings | File Templates.
*/
public class ArtificialPoolWalker extends LocusWalker<ArtificialPoolContext, ArtificialPoolContext> {
@Argument(fullName = "AuxOutputFile", shortName = "af", doc = "Auxiliary file for genotyp & coverage output", required = true)
String auxFilePath = null;
@Argument(fullName = "OutputBamFile", shortName = "of", doc = "Output to this file rather than standard output", required = false)
SAMFileWriter outputBamFile = null;
public void initialize() {
// initialize the output writer
}
public ArtificialPoolContext reduceInit() { // try to initialize the file writer
ArtificialPoolContext apContext = new ArtificialPoolContext();
apContext.setSingleSampleGenotyper(new SingleSampleGenotyper());
apContext.setReadGroupSets(getToolkit().getMergedReadGroupsByReaders());
apContext.setAuxWriter(initializeAuxFileWriter(apContext.getTotalNumberOfPeople()));
apContext.setSAMFileWriter(outputBamFile);
apContext.initializeSSG();
return apContext;
}
public ArtificialPoolContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return new ArtificialPoolContext(tracker,ref,context);
}
public ArtificialPoolContext reduce(ArtificialPoolContext mapCon, ArtificialPoolContext redCon){
/* ArtificialPoolContext updatedContext = ArtificialPoolContext.mapReduceMerge(mapCon,redCon);
List<SAMRecord>[] newReads = updatedContext.splitReadsByGroup(updatedContext.getNewReads());
long[] newCvg = updateRunningCoverage(updatedContext.getRunningCoverage(), getCoverageByGroup(newReads));
updatedContext.setRunningCoverage(newCvg);
List<SAMRecord>[] sampledReads = ArtificialPoolContext.sampleReads(newReads,runningCoverageToDouble(newCvg));
printToFiles(sampledReads,updatedContext);*/
AlignmentContext context = redCon.getAlignmentContext();
SAMFileWriter writer = redCon.getSAMFileWriter();
for(SAMRecord read : context.getReads()) {
writer.addAlignment(read);
}
PrintWriter auxWrite = redCon.getWriterToAuxiliaryFile();
auxWrite.print("This is a test.");
ArtificialPoolContext updatedContext = redCon;
return updatedContext;
}
// Helper methods follow
private PrintWriter initializeAuxFileWriter(int nFiles) {
PrintWriter auxFileWriter;
try {
auxWrite = new PrintWriter(new FileOutputStream(auxFilepath));
}
catch(FileNotFoundException e) {
String ermsg = "auxiliary file for Artificial Pool Walker was either a folder or could not be created";
throw new StingException(ermsg, e);
auxFileWriter = new PrintWriter(new FileOutputStream(auxFilePath));
auxFileWriter.println(createAuxFileHeader(nFiles));
} catch(FileNotFoundException e) {
String errmsg = "The filepath you entered "+auxFilePath+" could not be opened. Please double-check that the input is correct.";
throw new StingException(errmsg, e);
} catch(IOException e) {
String errmsg = "The file you entered "+auxFilePath+" could not be written to. Please check your permissions to write to this file.";
throw new StingException(errmsg,e);
}
// create the file header and write to file
auxWrite.printf("%s%n",createFileHeader());
// obtain the readgroup sets
readGroupSets = getToolkit().getMergedReadGroupsByReaders();
npeople = readGroupSets.size();
// if a reduction proportion was unspecified, set it to 1/num_files
if(red_prop <= 0) {
red_prop = 1.0/npeople;
}
// initialize the local genotype array
local_genotypes = new Pair[npeople];
// initilize the single sample genotyper
ssg = new SingleSampleGenotyper();
ssg.initialize();
// initialize the living reads array
living_reads = new LinkedList[npeople];
for(int iter=0; iter < readGroupSets.size(); iter++) {
living_reads[iter] = new LinkedList<Integer>();
}
return auxFileWriter;
}
public List<SAMRecord>[] map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
updateLiving();
// each time we move to the next locus, remove from the coverage count those reads that ended
auxWrite.printf("%s:%s",context.getContig(),context.getPosition()); // TODO: PUT IN REDUCE
return getNewReadsAndGenotypesByGroup(tracker, ref, context);
}
public SAMFileWriter reduceInit() {
return outputBamFile;
}
public SAMFileWriter reduce(List<SAMRecord>[] readsByReadGroup, SAMFileWriter outFile) {
// want to generate a sub sample of the new reads
int total_new_coverage = 0;
for(int pers = 0; pers < npeople; pers++) {
total_new_coverage += readsByReadGroup[pers].size();
}
int sought_coverage = (int) Math.ceil(red_prop * (double)total_new_coverage);
List<SAMRecord>[] randomReadsByGroup = drawReadsRandomlyFromReadsByGroup(readsByReadGroup,sought_coverage);
printToFileAndAuxFile(randomReadsByGroup,sought_coverage,outFile);
return outFile;
}
// ArtificalPoolWalker helper methods begin below
private List<SAMRecord>[] drawReadsRandomlyFromReadsByGroup(List<SAMRecord>[] readsByReadGroup, int numToDraw){
int[] simulatedCoverage = simulateRandomUniformDraws(numToDraw, readsByReadGroup);
return removeReadsToCoverage(readsByReadGroup,simulatedCoverage);
}
private List<SAMRecord>[] removeReadsToCoverage(List<SAMRecord>[] readsByReadGroup, int[] simulatedCoverage) {
for(int group = 0; group < npeople; group++) {
int cvgAtGroup = readsByReadGroup[group].size();
for(int drawDown = simulatedCoverage[group]; drawDown > 0; drawDown --) {
readsByReadGroup[group].remove((int)Math.floor((double) cvgAtGroup*Math.random()));
cvgAtGroup--;
}
}
return readsByReadGroup;
}
private int[] simulateRandomUniformDraws(int numToDraw, List<SAMRecord>[] readsByReadGroup) {
int[] drawsByPerson = new int[npeople];
for(int pers = 0; pers < npeople; pers++) { //array instantiation
drawsByPerson[pers] = 0;
}
for(int draw = 0; draw < numToDraw; draw++) {
int personToDrawFrom = (int) Math.floor((double) npeople * Math.random());
drawsByPerson[personToDrawFrom]++;
}
return redistributeSimulatedCoverage(drawsByPerson,readsByReadGroup,0, 0, null);
}
private int[] redistributeSimulatedCoverage(int[] drawsByPerson, List<SAMRecord>[] readsByReadGroup, int excess, int nOffLimits, boolean[] offLimits) {
int [] result;
if(offLimits == null) { // first time calling
offLimits = new boolean[npeople];
for(int i = 0; i < npeople; i++) {
offLimits[i] = false;
}
result = redistributeSimulatedCoverage(drawsByPerson,readsByReadGroup,excess,0, offLimits);
} else if (excess == 0) { // no overt excess, need to check individual persons
// get excess coverage
int newExcess = 0;
for(int pers = 0; pers < npeople; pers++) {
if(!offLimits[pers] && drawsByPerson[pers] > readsByReadGroup[pers].size()) {
newExcess += drawsByPerson[pers] - readsByReadGroup[pers].size();
drawsByPerson[pers] = readsByReadGroup[pers].size();
offLimits[pers] = true;
nOffLimits++;
} else {
// do nothing
}
}
if(newExcess == 0) { // we are done!
result = drawsByPerson;
} else { // there is now overt excess
result = redistributeSimulatedCoverage(drawsByPerson,readsByReadGroup,newExcess,nOffLimits,offLimits);
}
} else { // there are overt excess reads to distribute
int nRemaining = npeople - nOffLimits;
int[] drawsToAdd = new int[nRemaining];
for(int j = 0; j < nRemaining; j++) {
drawsToAdd[j] = 0;
}
for(int draw = excess; draw > 0; draw--) {
drawsToAdd[(int) Math.floor((double)nRemaining * Math.random())]++;
}
int notOffLimitsCounter = 0;
for(int pers = 0; pers < npeople; pers++) {
if(! offLimits[pers]) {
drawsByPerson[pers] += drawsToAdd[notOffLimitsCounter];
notOffLimitsCounter++;
} else {
// do nothing
}
}
result = redistributeSimulatedCoverage(drawsByPerson,readsByReadGroup,0,nOffLimits,offLimits);
}
return result;
}
private int[] createNewPersonIndex(int peopleWithReadsLeft, int[] indexLeft, int indexToRemoveFrom) {
int[] newPersonIndex = new int[peopleWithReadsLeft-1];
for(int i = 0; i < peopleWithReadsLeft; i++) {
if(i < indexToRemoveFrom) {
newPersonIndex[i] = indexLeft[i];
} else {
newPersonIndex[i] = indexLeft[i+1];
}
}
return newPersonIndex;
}
private List<SAMRecord>[] getNewReadsAndGenotypesByGroup(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
{
// at the same time we will:
// 1) split reads up by read group
// 2) get new reads
// 3) get genotypes for each individual
// then we will:
// return the new reads (a subsample), stratified by read group
List<SAMRecord> allReads = context.getReads();
int n_reads_total = allReads.size();
List<Integer> allOffsets = context.getOffsets();
ListIterator readIterator = allReads.listIterator();
ListIterator offsetIterator = allOffsets.listIterator();
LinkedList<SAMRecord>[] reads_by_group = new LinkedList[npeople];
LinkedList<Integer>[] offsets_by_group = new LinkedList[npeople];
LinkedList<SAMRecord>[] new_reads_by_group = new LinkedList[npeople];
for(int j = 0; j < npeople; j++) {
reads_by_group[j] = new LinkedList<SAMRecord>();
offsets_by_group[j] = new LinkedList<Integer>();
new_reads_by_group[j] = new LinkedList<SAMRecord>();
}
while(readIterator.hasNext()) {
int iter_person = 0;
SAMRecord read = (SAMRecord) readIterator.next();
int offset = (Integer) offsetIterator.next();
while(!readGroupSets.get(iter_person).contains((String) read.getAttribute("RG"))) {
iter_person++;
// for debug purposes only:
if(iter_person > npeople) {
throw new StingException("Read not found in any read group in ArtificialPoolWalker. Life sucks.");
}
}
// here, iter_person is the person from whom the read came so:
reads_by_group[iter_person].add(read);
offsets_by_group[iter_person].add(offset);
// check to see if the read is new
if(offset == 0) {
new_reads_by_group[iter_person].add(read);
// add this to the new reads set to be randomly selected and printed
}
}
// now we obtain the genotypes
for(int iter=0; iter<npeople; iter++) {
AlignmentContext subContext = new AlignmentContext(context.getLocation(), reads_by_group[iter], offsets_by_group[iter]);
GenotypeCall call = ssg.map(tracker, ref, subContext);
local_genotypes[iter].set(call.getGenotypes().get(0).getBases(),call.getGenotypes().get(0).getConfidenceScore().getScore());
}
return new_reads_by_group;
}
private String createFileHeader() {
private String createAuxFileHeader(int nFiles) {
String sp = " ";
String st1 = "Chrom:Pos" + sp;
String st2 = "";
for(int j = 0; j < npeople; j++) {
for(int j = 0; j < nFiles; j++) {
st2 = st2 + "Pers " + j + " Gen" + sp; // short for "genotype of person j at this location"
st2 = st2 + "Pers " + j + " Conf" + sp; // short for "confidence in genotype call of ..."
st2 = st2 + "Pers " + j + " Cvg" + sp; // short for "coverage of person j at this location"
st2 = st2 + "Pers " + j + " NewCvg" + sp; // short for "coverage of person j at this location"
}
String st3 = "TotalCvg";
return st1+st2+st3;
}
private void updateLiving()
{
// this function updates the living reads, removing them if the new location goes beyond the end
private int[] getCoverageByGroup(List<SAMRecord>[] readsByGroup) {
int[] coverage = new int[readsByGroup.length];
for(int iterator = 0; iterator < readsByGroup.length; iterator ++) {
coverage[iterator] = readsByGroup[iterator].size();
}
int readLengthLeft;
ListIterator updater;
return coverage;
}
for(int j = 0; j < npeople; j++) {
updater = living_reads[j].listIterator();
private long[] updateRunningCoverage(long[] cvgUpToNow, int[] newCvgByGroup) {
long[] newCvg = new long[cvgUpToNow.length];
for(int iter = 0; iter < cvgUpToNow.length; iter++) {
newCvg[iter] = cvgUpToNow[iter] + newCvgByGroup[iter];
}
while(updater.hasNext()) {
readLengthLeft = (Integer) updater.next();
if(readLengthLeft <= 1) {
updater.remove();
} else {
updater.set(readLengthLeft-1);
}
return newCvg;
}
} // end while(updater.hasNext())
} // end for(int j=0; j < npeople; j++)
}
private double[] runningCoverageToDouble(long[] cvg) {
double[] avgProp = new double[cvg.length];
long sum = 0;
for(long elem : cvg) {
sum += elem;
}
for(int iter = 0; iter < cvg.length; iter++) {
avgProp[iter] = cvg[iter]/sum;
}
private void printToFileAndAuxFile(List<SAMRecord>[] downSampledReadsByGroup, int cvg, SAMFileWriter outFile)
{
// NOTE: the chromosome and position were already printed in the auxFile during map.
for(int pers = 0; pers < npeople; pers++) {
List<SAMRecord> personReads = downSampledReadsByGroup[pers];
for(SAMRecord read : personReads) {
outFile.addAlignment(read);
living_reads[pers].add(read.getReadLength());
return avgProp;
}
private void printToFiles(List<SAMRecord>[] sampledNewReads, ArtificialPoolContext context) {
SAMFileWriter samWrite = context.getSAMFileWriter();
String sp = " ";
PrintWriter auxWrite = context.getWriterToAuxiliaryFile();
int readGroupInt = 0;
for(List<SAMRecord> readGroup : sampledNewReads) {
for(SAMRecord read : readGroup) {
samWrite.addAlignment(read);
}
auxWrite.print(context.getAlignmentContext().getLocation().toString() + sp);
auxWrite.print(context.genotypeAndConfidenceToString(readGroupInt,sp));
readGroupInt++;
}
String auxformstring = " %s %f %d";
// now print to the aux file
int totalcvg = 0;
for(int pers = 0; pers < npeople; pers++) {
auxWrite.printf(auxformstring, local_genotypes[pers].first, local_genotypes[pers].second,living_reads[pers].size());
totalcvg += living_reads[pers].size();
}
auxWrite.printf(" %d%n", totalcvg);
}
}

View File

@ -35,8 +35,11 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
@Argument(fullName="bootStrap", shortName="bs", doc="Use a bootstrap method", required=false)
public boolean useBootstrap = false;
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls")
public double threshold = 3.0;
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls", required=false)
public double lodThreshold = 3.0;
@Argument(fullName="minQScore", shortName="qm", doc="Threshold for the minimum quality score, filter out bases below",required=false)
public byte minQScore = 0;
private static final int BOOTSTRAP_ITERATIONS = 300;
@ -58,11 +61,18 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
}
public Pair<Integer,Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> readsByDirection = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
AlignmentContext filteredContext;
if (this.getMinQualityScore() <= 0) {
filteredContext = context;
} else {
Pair<List<SAMRecord>,List<Integer>> readsFilteredByQuality = filterByQuality(context.getReads(),context.getOffsets(), this.getMinQualityScore());
filteredContext = new AlignmentContext(context.getLocation(),readsFilteredByQuality.getFirst(),readsFilteredByQuality.getSecond());
}
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> readsByDirection = PoolUtils.splitReadsByReadDirection(filteredContext.getReads(),filteredContext.getOffsets());
if ( ! suppress_printing) {
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, context);
out.printf("%s: %d %d %d %d %d %d %f %f %f%n", context.getLocation(), readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
context.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, filteredContext);
out.printf("%s: %d %d %d %d %d %d %f %f %f%n", filteredContext.getLocation(), readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
filteredContext.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
powers.getFirst()[0], powers.getFirst()[1], powers.getFirst()[2]);
}
return new Pair(readsByDirection.getFirst().getFirst().size(),readsByDirection.getFirst().getSecond().size());
@ -73,9 +83,9 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
public Pair<double[],byte[]> calculatePower(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, boolean bootStrap, AlignmentContext context) {
Pair<double[],byte[]> powerAndMedianQ;
if( bootStrap ) {
powerAndMedianQ = calculateDirectionalPowersByBootstrap(dirReads,threshold,context);
powerAndMedianQ = calculateDirectionalPowersByBootstrap(dirReads,lodThreshold,context);
} else {
powerAndMedianQ = calculateDirectionalPowersTheoretical(dirReads,threshold,context);
powerAndMedianQ = calculateDirectionalPowersTheoretical(dirReads,lodThreshold,context);
}
return powerAndMedianQ;
@ -162,7 +172,15 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
}
public String createHeaderString() {
return "Chrom:Pos CvgF CvgR CvgC QmedF QmedR QmedC PowF PowR PowC";
return "Chrom:Pos ForwardCoverage ReverseCoverage CombinedCoverage ForwardMedianQ ReverseMedianQ CombinedMedianQ PowerForward PowerReverse PowerCombined";
}
public byte getMinQualityScore() {
return minQScore;
}
public Pair<List<SAMRecord>,List<Integer>> filterByQuality(List<SAMRecord> reads, List<Integer> offsets, byte qMin) {
return PoolUtils.thresholdReadsByQuality(reads,offsets,qMin);
}
// class methods

View File

@ -18,6 +18,12 @@ public class PoolUtils {
private PoolUtils () {}
public static final int BASE_A_OFFSET = 0;
public static final int BASE_C_OFFSET = 1;
public static final int BASE_G_OFFSET = 2;
public static final int BASE_T_OFFSET = 3;
public static final int BASE_INDEXED_ARRAY_SIZE = 4;
public static Pair<Pair<List<SAMRecord>, List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> splitReadsByReadDirection(List<SAMRecord> reads, List<Integer> offsets) {
ArrayList<SAMRecord> forwardReads;
ArrayList<SAMRecord> reverseReads;
@ -48,5 +54,108 @@ public class PoolUtils {
return new Pair(new Pair(forwardReads,reverseReads), new Pair(forwardOffsets,reverseOffsets));
}
public static Pair<List<SAMRecord>[], List<Integer>[]> splitReadsByBase(List<SAMRecord> reads, List<Integer> offsets) {
ArrayList<SAMRecord>[] readsByBase;
ArrayList<Integer>[] offsetsByBase;
if ( reads == null ) {
readsByBase = null;
offsetsByBase = null;
} else {
readsByBase = new ArrayList[4];
offsetsByBase = new ArrayList[4];
for(int readNum = 0; readNum < reads.size(); readNum++) {
switch (reads.get(readNum).getReadBases()[offsets.get(readNum)]) {
case 'A':
case 'a': readsByBase[0].add(reads.get(readNum));
offsetsByBase[0].add(offsets.get(readNum));
break;
case 'C':
case 'c': readsByBase[1].add(reads.get(readNum));
offsetsByBase[1].add(offsets.get(readNum));
break;
case 'G':
case 'g': readsByBase[2].add(reads.get(readNum));
offsetsByBase[2].add(offsets.get(readNum));
break;
case 'T':
case 't': readsByBase[3].add(reads.get(readNum));
offsetsByBase[3].add(offsets.get(readNum));
break;
default: break; // TODO: INDEL AWARENESS
}
}
}
return new Pair(readsByBase,offsetsByBase);
}
public static Pair<List<SAMRecord>, List<Integer>> thresholdReadsByQuality(List<SAMRecord> reads, List<Integer> offsets, byte qThresh) {
List<SAMRecord> threshReads;
List<Integer> threshOffsets;
if(reads == null) {
threshReads=null;
threshOffsets = null;
} else if (qThresh <= 0) {
threshReads = reads;
threshOffsets = offsets;
} else {
threshReads = new ArrayList();
threshOffsets = new ArrayList();
for ( int readNo = 0; readNo < reads.size(); readNo ++) {
if ( reads.get(readNo).getBaseQualities()[offsets.get(readNo)] >= qThresh) {
threshReads.add(reads.get(readNo));
threshOffsets.add(offsets.get(readNo));
} // else do nothing
}
}
return new Pair(threshReads,threshOffsets);
}
public static int getBaseOffset(char base) {
switch(base) {
case 'A':
case 'a':
return getBaseAOffset();
case 'C':
case 'c':
return getBaseCOffset();
case 'G':
case 'g':
return getBaseGOffset();
case 'T':
case 't':
return getBaseTOffset();
default:
return -1;
}
//TODO: indel offsets
}
public static int getBaseAOffset() {
return BASE_A_OFFSET;
}
public static int getBaseCOffset() {
return BASE_C_OFFSET;
}
public static int getBaseGOffset() {
return BASE_G_OFFSET;
}
public static int getBaseTOffset() {
return BASE_T_OFFSET;
}
public static List<Byte> getListOfBaseQualities(List<SAMRecord> reads,List<Integer> offsets) {
//TODO: this is a terrible method name. Change it to something better.
List<Byte> qualities = new ArrayList<Byte>(reads.size());
for (int readNo = 0; readNo < reads.size(); readNo ++) {
qualities.add(reads.get(readNo).getBaseQualities()[offsets.get(readNo)]);
}
return qualities;
}
}