From 93cedf42855df36983da1f3d08ffd5e010c31690 Mon Sep 17 00:00:00 2001 From: chartl Date: Thu, 3 Sep 2009 21:26:04 +0000 Subject: [PATCH] --------------- | Added items | --------------- @/varianteval/PoolAnalysis Interface to identify variant analyses that are pool-specific. @/varianteval/BasicPoolVariantAnalysis Nearly the same as BasicVariantAnalysis with the addition of a protected integer (numIndividualsInPool) which holds the pool size. One soulcrushing change is that "protected String filename" needed to become "protected String[] filename" since now multiple truth files may be looked at. It was tempting to make the change in BasicVariantAnalysis with some default methods that would maintain usability of the remainder of the VariantAnalysis objects, but I decided to hold off. We can always merge these together later. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1526 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/poolseq/AnalyzePowerWalker.java | 138 ++++++++++++++--- .../poolseq/ArtificalPoolWalkerMk2.java | 146 ------------------ .../varianteval/BasicPoolVariantAnalysis.java | 58 +++++++ .../walkers/varianteval/PoolAnalysis.java | 12 ++ 4 files changed, 186 insertions(+), 168 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/ArtificalPoolWalkerMk2.java create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/AnalyzePowerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/AnalyzePowerWalker.java index 191327e49..5f51c6b80 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/AnalyzePowerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/AnalyzePowerWalker.java @@ -40,14 +40,19 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{ BufferedReader syzyFileReader; final String pfFileDelimiter = " "; boolean outOfLinesInSyzyFile = false; + private double[][] syzyPowerTable; + private String[][] syzySanityCheckTable; @Override public void initialize() { super.initialize(); + syzyPowerTable = new double[8][1500000]; + syzySanityCheckTable = new String[8][1500000]; try { syzyFileReader = new BufferedReader(new FileReader(pathToSyzygyFile)); - System.out.println(syzyFileReader.readLine()); + logger.error("generatePowerTable called"); + generatePowerTable(syzyFileReader); } catch (FileNotFoundException e) { String newErrMsg = "Syzygy input file " + pathToSyzygyFile + " could be incorrect. File not found."; throw new StingException(newErrMsg,e); @@ -59,34 +64,36 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{ } @Override - public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) + public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext rawContext) { + AlignmentContext context; + if (super.getMinQualityScore() <= 0) { + context = rawContext; + } else { + Pair,List> readsFilteredByQuality = filterByQuality(rawContext.getReads(),rawContext.getOffsets(), super.getMinQualityScore()); + context = new AlignmentContext(rawContext.getLocation(),readsFilteredByQuality.getFirst(),readsFilteredByQuality.getSecond()); + } Pair, List>,Pair,List>> splitReads = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets()); if ( !super.suppress_printing ) { Pair powPair = super.calculatePower(splitReads,false,context); - boolean syzyFileIsReady=true; - try { - syzyFileIsReady = syzyFileReader.ready(); - } - catch(IOException e) { - throw new StingException("Input file reader was not ready before an attempt to read from it", e); - } - if(!syzyFileIsReady) { - throw new StingException("Input file reader was not ready before an attempt to read from it, but there was no IOException"); - } else if(!outOfLinesInSyzyFile) { - Pair syzyPow = getSyzyPowFromFile(); - out.printf("%s: %d %d %d %d %d %d %f %f %f %f |%s%n", context.getLocation(), splitReads.getFirst().getFirst().size(), splitReads.getFirst().getSecond().size(), - context.getReads().size(), powPair.getSecond()[0], powPair.getSecond()[1], powPair.getSecond()[2], - powPair.getFirst()[0], powPair.getFirst()[1], powPair.getFirst()[2], syzyPow.getFirst(), syzyPow.getSecond()); - } else { - out.printf("%s: $d %d %d %d %d %d %d %f %f %f%n", context.getLocation(), splitReads.getFirst().getFirst().size(), splitReads.getFirst().getSecond().size(), - context.getReads().size(), powPair.getSecond()[0], powPair.getSecond()[1], powPair.getSecond()[2], - powPair.getFirst()[0], powPair.getFirst()[1], powPair.getFirst()[2]); + + int tabIndexChrom = getTableChromIndex(context.getLocation().toString()); + int tabIndexLoc = getTablePosIndex(context.getLocation().toString()); + double syzyPow = 0; + String syzySanity = "NoLoc"; + if(tabIndexChrom >= 0 && tabIndexLoc >= 0) { + syzyPow += syzyPowerTable[tabIndexChrom][tabIndexLoc]; + syzySanity = "s " + syzySanityCheckTable[tabIndexChrom][tabIndexLoc]; + } + out.printf("%s|%s: %d %d %d %d %d %d %f %f %f %f %n", context.getLocation(), syzySanity, splitReads.getFirst().getFirst().size(), splitReads.getFirst().getSecond().size(), + context.getReads().size(), powPair.getSecond()[0], powPair.getSecond()[1], powPair.getSecond()[2], + powPair.getFirst()[0], powPair.getFirst()[1], powPair.getFirst()[2], syzyPow); + } return new Pair(splitReads.getFirst().getFirst().size(), splitReads.getFirst().getFirst().size()); @@ -103,13 +110,14 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{ return new Pair(-1.1, "Printing Stops Here"); } - String chromPos = null; + String chromPosTar = null; StringTokenizer lineTokenizer = new StringTokenizer(thisLine, pfFileDelimiter); try { - chromPos = lineTokenizer.nextToken(); + chromPosTar = lineTokenizer.nextToken(); for(int j = 1; j < colOffset; j++) { lineTokenizer.nextToken(); } + String chromPos = (new StringTokenizer(chromPosTar, "\t")).nextToken(); return new Pair((Double.valueOf(lineTokenizer.nextToken())/100.0),chromPos); } catch (NoSuchElementException e) { String errMsg = "The given column offset for the pool, " + colOffset + " exceeded the number of entries in the file " + pathToSyzygyFile; @@ -120,4 +128,90 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{ public String createHeaderString() { return (super.createHeaderString() + " PowSyz"); } + + //TODO: rewrite this so it's not a complete hack!!! + + public void generatePowerTable(BufferedReader syzFile) { + + System.out.println("generatePowerTable entered"); + int numreads = 0; + + try{ + while(syzFile.ready()) { + String line = syzFile.readLine(); + if(line == null || line.equals("")) { + break; + } + StringTokenizer tok = new StringTokenizer(line, " :\t"); + String chrmNoStrWithChr = tok.nextToken(); + String chrmNoStr = chrmNoStrWithChr.substring(3); + String locStr = tok.nextToken(); + for(int j = colOffset - 2; j > 0; j --) { + tok.nextToken(); + } + numreads++; + String syzyPowStr = tok.nextToken(); + if ( chrmNoStr == null || locStr == null || chrmNoStr.equals("") || locStr.equals("")) { + break; + } + int chrIndex = getTableChromIndex(chrmNoStr,locStr); + int posIndex = getTablePosIndex(chrmNoStr,locStr); + syzyPowerTable[chrIndex][posIndex] = Double.valueOf(syzyPowStr)/100.0; + syzySanityCheckTable[chrIndex][posIndex] = "chrm" + chrmNoStr +":" + locStr; + if( (numreads % 1000) == 0){ + System.out.println(numreads + " reads from Syzygy file"); + } + } + } catch (IOException e) { + // do nothing + System.out.println("IOException caught"); + } + System.out.println("Table generated."); + + } + + public int getTableChromIndex(String chromNo, String locNo) { + switch (Integer.valueOf(chromNo)) { + case 1: return 0; + case 2: return 1; + case 3: + if ( Integer.valueOf(locNo) < 63000000 ) + return 2; + else + return 3; + case 7: return 4; + case 10: return 5; + case 11: return 6; + case 12: return 7; + default: + System.out.println(chromNo + " " + locNo); + return -1; + } + } + + public int getTableChromIndex(String chromAndPos) { + StringTokenizer tok = new StringTokenizer(chromAndPos,":"); + return getTableChromIndex(tok.nextToken().substring(3),tok.nextToken()); + } + + public int getTablePosIndex(String chromAndPos) { + StringTokenizer tok = new StringTokenizer(chromAndPos,":"); + return getTablePosIndex(tok.nextToken().substring(3),tok.nextToken()); + } + + public int getTablePosIndex(String chromNo, String locNo) { + switch ( getTableChromIndex(chromNo, locNo) ) { + case 0: return Integer.valueOf(locNo) - 120065274; + case 1: return Integer.valueOf(locNo) - 43311321; + case 2: return Integer.valueOf(locNo) - 12157091; + case 3: return Integer.valueOf(locNo) - 64000000; + case 4: return Integer.valueOf(locNo) - 27838907; + case 5: return Integer.valueOf(locNo) - 12003199; + case 6: return Integer.valueOf(locNo) - 92342389; + case 7: return Integer.valueOf(locNo) - 69809219; + default: + System.out.println(chromNo + " " + locNo); + return -1; + } + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/ArtificalPoolWalkerMk2.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/ArtificalPoolWalkerMk2.java deleted file mode 100755 index cf6c67a09..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/ArtificalPoolWalkerMk2.java +++ /dev/null @@ -1,146 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.poolseq; - -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.playground.utils.ArtificialPoolContext; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.StingException; - -import java.io.FileOutputStream; -import java.io.PrintWriter; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Set; -import java.util.List; -import java.util.ListIterator; -import java.util.LinkedList; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Aug 26, 2009 - * Time: 11:28:26 AM - * To change this template use File | Settings | File Templates. - */ -public class ArtificalPoolWalkerMk2 extends LocusWalker { - - @Argument(fullName = "AuxOutputFile", shortName = "af", doc = "Auxiliary file for genotyp & coverage output", required = true) - String auxFilePath = null; - @Argument(fullName = "OutputBamFile", shortName = "of", doc = "Output to this file rather than standard output", required = false) - SAMFileWriter outputBamFile = null; - - public void initialize() { - } - - public ArtificialPoolContext reduceInit() { // try to initialize the file writer - ArtificialPoolContext apContext = new ArtificialPoolContext(); - apContext.setSingleSampleGenotyper(new SingleSampleGenotyper()); - apContext.setReadGroupSets(getToolkit().getMergedReadGroupsByReaders()); - apContext.setAuxWriter(initializeAuxFileWriter(apContext.getTotalNumberOfPeople())); - apContext.setSAMFileWriter(outputBamFile); - apContext.initializeSSG(); - - return apContext; - } - - public ArtificialPoolContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return new ArtificialPoolContext(tracker,ref,context); - } - - public ArtificialPoolContext reduce(ArtificialPoolContext mapCon, ArtificialPoolContext redCon){ - ArtificialPoolContext updatedContext = ArtificialPoolContext.mapReduceMerge(mapCon,redCon); - List[] newReads = updatedContext.splitReadsByGroup(updatedContext.getNewReads()); - long[] newCvg = updateRunningCoverage(updatedContext.getRunningCoverage(), getCoverageByGroup(newReads)); - updatedContext.setRunningCoverage(newCvg); - List[] sampledReads = ArtificialPoolContext.sampleReads(newReads,runningCoverageToDouble(newCvg)); - printToFiles(sampledReads,updatedContext); - return updatedContext; - } - - // Helper methods follow - - private PrintWriter initializeAuxFileWriter(int nFiles) { - PrintWriter auxFileWriter; - try { - auxFileWriter = new PrintWriter(new FileOutputStream(auxFilePath)); - auxFileWriter.println(createAuxFileHeader(nFiles)); - } catch(FileNotFoundException e) { - String errmsg = "The filepath you entered "+auxFilePath+" could not be opened. Please double-check that the input is correct."; - throw new StingException(errmsg, e); - } catch(IOException e) { - String errmsg = "The file you entered "+auxFilePath+" could not be written to. Please check your permissions to write to this file."; - throw new StingException(errmsg,e); - } - - return auxFileWriter; - } - - private String createAuxFileHeader(int nFiles) { - String sp = " "; - String st1 = "Chrom:Pos" + sp; - String st2 = ""; - for(int j = 0; j < nFiles; j++) { - st2 = st2 + "Pers " + j + " Gen" + sp; // short for "genotype of person j at this location" - st2 = st2 + "Pers " + j + " Conf" + sp; // short for "confidence in genotype call of ..." - st2 = st2 + "Pers " + j + " NewCvg" + sp; // short for "coverage of person j at this location" - } - String st3 = "TotalCvg"; - - return st1+st2+st3; - } - - private int[] getCoverageByGroup(List[] readsByGroup) { - int[] coverage = new int[readsByGroup.length]; - for(int iterator = 0; iterator < readsByGroup.length; iterator ++) { - coverage[iterator] = readsByGroup[iterator].size(); - } - - return coverage; - } - - private long[] updateRunningCoverage(long[] cvgUpToNow, int[] newCvgByGroup) { - long[] newCvg = new long[cvgUpToNow.length]; - for(int iter = 0; iter < cvgUpToNow.length; iter++) { - newCvg[iter] = cvgUpToNow[iter] + newCvgByGroup[iter]; - } - - return newCvg; - } - - private double[] runningCoverageToDouble(long[] cvg) { - double[] avgProp = new double[cvg.length]; - long sum = 0; - for(long elem : cvg) { - sum += elem; - } - for(int iter = 0; iter < cvg.length; iter++) { - avgProp[iter] = cvg[iter]/sum; - } - - return avgProp; - } - - private void printToFiles(List[] sampledNewReads, ArtificialPoolContext context) { - SAMFileWriter samWrite = context.getSAMFileWriter(); - String sp = " "; - PrintWriter auxWrite = context.getWriterToAuxiliaryFile(); - int readGroupInt = 0; - for(List readGroup : sampledNewReads) { - for(SAMRecord read : readGroup) { - samWrite.addAlignment(read); - } - auxWrite.print(context.getAlignmentContext().getLocation().toString() + sp); - auxWrite.print(context.genotypeAndConfidenceToString(readGroupInt,sp)); - readGroupInt++; - } - - } - - -} - diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java new file mode 100755 index 000000000..1fd77bd3d --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java @@ -0,0 +1,58 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +import org.broadinstitute.sting.gatk.refdata.AllelicVariant; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; + +import java.io.PrintStream; +import java.util.List; +import java.util.ArrayList; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Sep 3, 2009 + * Time: 5:04:40 PM + * To change this template use File | Settings | File Templates. + */ +public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{ + protected int numIndividualsInPool; + protected String name; + protected String[] filenames; + protected PrintStream out, callOut; + private VariantEvalWalker master; + + public BasicPoolVariantAnalysis(String name, int nIndividuals) { + this.name = name; + this.numIndividualsInPool = nIndividuals; + } + + public String getName() { return this.name; } + + public int getNumberOfIndividualsInPool() { return this.numIndividualsInPool; } + + public PrintStream getSummaryPrintStream() { return this.out; } + + public PrintStream getCallPrintStream() { return this.callOut; } + + public VariantEvalWalker getMaster() { return this.master; } + + public List getParams() { return new ArrayList(); } + + public List done() { return new ArrayList(); } + + public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String[] filenames) { + this.master = master; + this.out = out; + this.callOut = callOut; + this.filenames = filenames; + } + + public void finalize(long nSites) { + // no need to finalize data in general + } + + public abstract String update(AllelicVariant variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context); + +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java new file mode 100755 index 000000000..71084672b --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java @@ -0,0 +1,12 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Sep 3, 2009 + * Time: 4:39:12 PM + * To change this template use File | Settings | File Templates. + */ +public interface PoolAnalysis { + +}