From 0c54aba92abea6282670fe1ca453f52ebdc285b5 Mon Sep 17 00:00:00 2001 From: chartl Date: Fri, 11 Sep 2009 15:01:50 +0000 Subject: [PATCH] Changes: @VariantEvalWalker - added a command line option to input a file path to a pooled call file for pooled genotype concordance checking. This string is to be passed to the PooledGenotypeConcordance object. @AllelicVariant - added a method isPooled() to distinguish pooled AllelicVariants from unpooled ones. @ all the rest - implemented isPooled(); for everything other than PooledEMSNProd it simply returns false, for PooledEMSNProd it returns true. Added: @PooledGenotypeConcordance - takes in a filepath to a pool file with the names of hapmap individuals for concordance checking with pooled calls and does said concordance checking over all pools. Commented out as all the methods are as yet unwritten. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1585 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/AllelicVariant.java | 5 + .../sting/gatk/refdata/PooledEMSNPROD.java | 1 + .../sting/gatk/refdata/RodGLF.java | 5 + .../sting/gatk/refdata/RodGeliText.java | 2 + .../gatk/refdata/RodGenotypeChipAsGFF.java | 1 + .../sting/gatk/refdata/SangerSNPROD.java | 1 + .../sting/gatk/refdata/SimpleIndelROD.java | 1 + .../sting/gatk/refdata/rodDbSNP.java | 2 + .../PooledGenotypeConcordance.java | 152 ++++++++++++++++++ .../varianteval/VariantEvalWalker.java | 3 + 10 files changed, 173 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java b/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java index eb59bac9e..78e34f136 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java @@ -144,4 +144,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum { /** returns the length of the variant. For SNPs this is just 1. */ int length(); + + /** + * returns TRUE if the variant is one for pooled calls, FALSE if it is not + */ + boolean isPooled(); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java index d3f44faa0..36f4ebc1b 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java @@ -46,6 +46,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes { public int getPloidy() throws IllegalStateException { return 2; } public boolean isBiallelic() { return true; } public int length() { return 1; } + public boolean isPooled() { return true; } // SNPCallFromGenotypes interface public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java index a0d88f002..b54b6f46a 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java @@ -407,5 +407,10 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator[] namesByPool = new LinkedList[nPools]; + + for( int firstLineIterator = 0; firstLineIterator < nPools; firstLineIterator ++) { + namesByPool[firstLineIterator] = new LinkedList(); + namesByPool[firstLineIterator].add(tokFirstLine.nextToken()); + } + + while(continueReading(reader)) { + String line = readLine(reader); + StringTokenizer tokLine = new StringTokenizer(line); + int newNames = tokLine.countTokens(); + for(int lineIt = 0; lineIt < newNames; lineIt ++ ) { + namesByPool[lineIt].add(tokLine.nextToken()); + } + } + + convertListOfNamesToMatrix(namesByPool); + } + + private boolean continueReading(BufferedReader reader) { + boolean continueReading = false; + try { + continueReading = reader.ready(); + } catch(IOException e) { + continueReading = false; + } + return continueReading; + } + + private String readLine(BufferedReader reader) { + String line; + try { + line = reader.readLine(); + } catch( IOException e) { + String errMsg = "BufferedReader pointing to "+reader.toString()+" was declared ready but no line could be read from it."; + throw new StingException(errMsg,e); + } + return line; + } + + private void convertListOfNamesToMatrix(LinkedList[] names) { + // initialize matrix + for( int pool = 0; pool < nPools; pool ++) { + individualsByPool[pool] = new String[names[pool].size()]; + individualsByPool[pool] = names[pool].toArray(individualsByPool[pool]); + } + + } +} +*/ \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index e22aa896d..8b3bd115e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -58,6 +58,9 @@ public class VariantEvalWalker extends RefWalker { @Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false) public int numPeopleInPool = 1; + @Argument(fullName = "pathToHapmapPoolFile", shortName="HPF", doc="If using a variant file from a pooled caller on pools of hapmap individuals, this field provides a filepath to the pool construction file listing which hapmap individuals are in which pool", required=false) + public String pathToHapmapPoolFile = null; + String analysisFilenameBase = null; final String knownSNPDBName = "dbSNP";