@VariantEvalWalker - added a command line option to input a file path to a pooled call file for pooled genotype concordance checking. This string is to be passed to the PooledGenotypeConcordance object.

@AllelicVariant - added a method isPooled() to distinguish pooled AllelicVariants from unpooled ones.

@ all the rest - implemented isPooled(); for everything other than PooledEMSNProd it simply returns false, for PooledEMSNProd it returns true.

Added:

@PooledGenotypeConcordance - takes in a filepath to a pool file with the names of hapmap individuals for concordance checking with pooled calls
 and does said concordance checking over all pools. Commented out as all the methods are as yet unwritten.




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1585 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-09-11 15:01:50 +00:00
parent 70ec37661c
commit 0c54aba92a
10 changed files with 173 additions and 0 deletions

View File

@ -144,4 +144,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum {
/** returns the length of the variant. For SNPs this is just 1.
*/
int length();
/**
* returns TRUE if the variant is one for pooled calls, FALSE if it is not
*/
boolean isPooled();
}

View File

@ -46,6 +46,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
public boolean isPooled() { return true; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }

View File

@ -407,5 +407,10 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
}
return glf;
}
@Override
public boolean isPooled() {
return false;
}
}

View File

@ -171,6 +171,8 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
return true;
}
public boolean isPooled() { return false; }
public int length() { return 1; }
public char getReferenceBase() { return refBase; }

View File

@ -175,4 +175,5 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
public boolean isPooled() { return false; }
}

View File

@ -32,6 +32,7 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
public boolean isPooled() { return false; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return -1; }

View File

@ -58,6 +58,7 @@ public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVaria
public double getHeterozygosity() { return 0.0; }
public double getMAF() { return 0.0; }
public int getPloidy() { return 2; }
public boolean isPooled() { return false; }
public int length() {
if ( is1KGFormat() )
return Integer.parseInt(this.get("2"));

View File

@ -239,4 +239,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
}
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
public boolean isPooled() { return false; }
}

View File

@ -0,0 +1,152 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.StringTokenizer;
import java.util.LinkedList;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 9, 2009
* Time: 4:45:21 PM
* To change this template use File | Settings | File Templates.
*
public class PooledGenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
private String[][] individualsByPool = null;
private int nPools;
private static final int REF = 0;
private static final int VAR_SINGLE = 1;
private static final int VAR_MULTI = 2;
private static final int UNKNOWN = 3;
private static final int NO_CALL = 3; // synonym
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_SINGLE_VARIANT", "IS_MULTIPLE_VARIANT", "UNKNOWN"};
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_SINGLE_VARIANT", "CALLED_MULTIPLE_VARIANT", "NO_CALL"};
// todo -- consider the resolution: single vs multi enough, or should there be deeper resolution?
private int[][][] table;
private int[][] truthTotalsByPool;
private int[][] callTotalsByPool;
public PooledGenotypeConcordance(String pathToHapmapPoolFile) {
super("genotype_concordance");
BufferedReader fileReader = null;
try {
fileReader = new BufferedReader(new FileReader(pathToHapmapPoolFile));
} catch (IOException e) {
String errorMsg = "Could not open any file at " + pathToHapmapPoolFile + " Please check to see if the path is accurate.";
throw new StingException(errorMsg, e);
}
generateNameTableFromFile(fileReader);
truthTotalsByPool = new int[nPools][4];
callTotalsByPool = new int[nPools][4];
table = new int[nPools][4][4];
for(int pool = 0; pool < nPools; pool++) {
for(int call1 = 0; call1 < 4; call1++) {
for(int call2 = 0; call2 < 4; call2++)
{
table[pool][call1][call2] = 0;
}
callTotalsByPool[pool][call1] = 0;
truthTotalsByPool[pool][call1] = 0;
}
}
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context){
for(int pool = 0; pool < nPools; pool ++) {
incorporatePoolCall(eval, tracker, ref, context, pool);
}
return null;
}
public void incorporatePoolCall(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context, int pool) {
for( int nameOffset = 0; nameOffset < individualsByPool[pool].length; nameOffset++) {
inc(eval, tracker, ref, context, pool, nameOffset);
}
}
public void inc(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context, int pool, int nameOffset) {
AllelicVariant chip = (AllelicVariant) tracker.lookup(individualsByPool[pool][nameOffset],null);
}
//
// private methods for reading names into individualsByPool from an external file
//
private void generateNameTableFromFile(BufferedReader reader) {
// first line is a special case
String firstline;
if(continueReading(reader)) {
firstline = readLine(reader);
} else {
String errMsg = "First line of the HapmapPoolFile " + reader.toString() + " could not be read. Please check that the path and file properly formatted.";
throw new StingException(errMsg);
}
StringTokenizer tokFirstLine = new StringTokenizer(firstline);
nPools = tokFirstLine.countTokens();
LinkedList<String>[] namesByPool = new LinkedList[nPools];
for( int firstLineIterator = 0; firstLineIterator < nPools; firstLineIterator ++) {
namesByPool[firstLineIterator] = new LinkedList();
namesByPool[firstLineIterator].add(tokFirstLine.nextToken());
}
while(continueReading(reader)) {
String line = readLine(reader);
StringTokenizer tokLine = new StringTokenizer(line);
int newNames = tokLine.countTokens();
for(int lineIt = 0; lineIt < newNames; lineIt ++ ) {
namesByPool[lineIt].add(tokLine.nextToken());
}
}
convertListOfNamesToMatrix(namesByPool);
}
private boolean continueReading(BufferedReader reader) {
boolean continueReading = false;
try {
continueReading = reader.ready();
} catch(IOException e) {
continueReading = false;
}
return continueReading;
}
private String readLine(BufferedReader reader) {
String line;
try {
line = reader.readLine();
} catch( IOException e) {
String errMsg = "BufferedReader pointing to "+reader.toString()+" was declared ready but no line could be read from it.";
throw new StingException(errMsg,e);
}
return line;
}
private void convertListOfNamesToMatrix(LinkedList<String>[] names) {
// initialize matrix
for( int pool = 0; pool < nPools; pool ++) {
individualsByPool[pool] = new String[names[pool].size()];
individualsByPool[pool] = names[pool].toArray(individualsByPool[pool]);
}
}
}
*/

View File

@ -58,6 +58,9 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
@Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false)
public int numPeopleInPool = 1;
@Argument(fullName = "pathToHapmapPoolFile", shortName="HPF", doc="If using a variant file from a pooled caller on pools of hapmap individuals, this field provides a filepath to the pool construction file listing which hapmap individuals are in which pool", required=false)
public String pathToHapmapPoolFile = null;
String analysisFilenameBase = null;
final String knownSNPDBName = "dbSNP";