Changes:
@VariantEvalWalker - added a command line option to input a file path to a pooled call file for pooled genotype concordance checking. This string is to be passed to the PooledGenotypeConcordance object. @AllelicVariant - added a method isPooled() to distinguish pooled AllelicVariants from unpooled ones. @ all the rest - implemented isPooled(); for everything other than PooledEMSNProd it simply returns false, for PooledEMSNProd it returns true. Added: @PooledGenotypeConcordance - takes in a filepath to a pool file with the names of hapmap individuals for concordance checking with pooled calls and does said concordance checking over all pools. Commented out as all the methods are as yet unwritten. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1585 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
70ec37661c
commit
0c54aba92a
|
|
@ -144,4 +144,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum {
|
|||
/** returns the length of the variant. For SNPs this is just 1.
|
||||
*/
|
||||
int length();
|
||||
|
||||
/**
|
||||
* returns TRUE if the variant is one for pooled calls, FALSE if it is not
|
||||
*/
|
||||
boolean isPooled();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
|||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
public boolean isPooled() { return true; }
|
||||
|
||||
// SNPCallFromGenotypes interface
|
||||
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }
|
||||
|
|
|
|||
|
|
@ -407,5 +407,10 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
}
|
||||
return glf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPooled() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -171,6 +171,8 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
return true;
|
||||
}
|
||||
|
||||
public boolean isPooled() { return false; }
|
||||
|
||||
public int length() { return 1; }
|
||||
|
||||
public char getReferenceBase() { return refBase; }
|
||||
|
|
|
|||
|
|
@ -175,4 +175,5 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
public boolean isPooled() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
|||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
public boolean isPooled() { return false; }
|
||||
|
||||
// SNPCallFromGenotypes interface
|
||||
public int nIndividuals() { return -1; }
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVaria
|
|||
public double getHeterozygosity() { return 0.0; }
|
||||
public double getMAF() { return 0.0; }
|
||||
public int getPloidy() { return 2; }
|
||||
public boolean isPooled() { return false; }
|
||||
public int length() {
|
||||
if ( is1KGFormat() )
|
||||
return Integer.parseInt(this.get("2"));
|
||||
|
|
|
|||
|
|
@ -239,4 +239,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
}
|
||||
|
||||
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
|
||||
|
||||
public boolean isPooled() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,152 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Sep 9, 2009
|
||||
* Time: 4:45:21 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*
|
||||
public class PooledGenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||
private String[][] individualsByPool = null;
|
||||
private int nPools;
|
||||
|
||||
private static final int REF = 0;
|
||||
private static final int VAR_SINGLE = 1;
|
||||
private static final int VAR_MULTI = 2;
|
||||
private static final int UNKNOWN = 3;
|
||||
private static final int NO_CALL = 3; // synonym
|
||||
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_SINGLE_VARIANT", "IS_MULTIPLE_VARIANT", "UNKNOWN"};
|
||||
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_SINGLE_VARIANT", "CALLED_MULTIPLE_VARIANT", "NO_CALL"};
|
||||
// todo -- consider the resolution: single vs multi enough, or should there be deeper resolution?
|
||||
|
||||
private int[][][] table;
|
||||
private int[][] truthTotalsByPool;
|
||||
private int[][] callTotalsByPool;
|
||||
|
||||
public PooledGenotypeConcordance(String pathToHapmapPoolFile) {
|
||||
super("genotype_concordance");
|
||||
BufferedReader fileReader = null;
|
||||
try {
|
||||
fileReader = new BufferedReader(new FileReader(pathToHapmapPoolFile));
|
||||
} catch (IOException e) {
|
||||
String errorMsg = "Could not open any file at " + pathToHapmapPoolFile + " Please check to see if the path is accurate.";
|
||||
throw new StingException(errorMsg, e);
|
||||
}
|
||||
|
||||
generateNameTableFromFile(fileReader);
|
||||
|
||||
truthTotalsByPool = new int[nPools][4];
|
||||
callTotalsByPool = new int[nPools][4];
|
||||
table = new int[nPools][4][4];
|
||||
for(int pool = 0; pool < nPools; pool++) {
|
||||
for(int call1 = 0; call1 < 4; call1++) {
|
||||
for(int call2 = 0; call2 < 4; call2++)
|
||||
{
|
||||
table[pool][call1][call2] = 0;
|
||||
}
|
||||
callTotalsByPool[pool][call1] = 0;
|
||||
truthTotalsByPool[pool][call1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context){
|
||||
for(int pool = 0; pool < nPools; pool ++) {
|
||||
incorporatePoolCall(eval, tracker, ref, context, pool);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public void incorporatePoolCall(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context, int pool) {
|
||||
for( int nameOffset = 0; nameOffset < individualsByPool[pool].length; nameOffset++) {
|
||||
inc(eval, tracker, ref, context, pool, nameOffset);
|
||||
}
|
||||
}
|
||||
|
||||
public void inc(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context, int pool, int nameOffset) {
|
||||
AllelicVariant chip = (AllelicVariant) tracker.lookup(individualsByPool[pool][nameOffset],null);
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// private methods for reading names into individualsByPool from an external file
|
||||
//
|
||||
|
||||
private void generateNameTableFromFile(BufferedReader reader) {
|
||||
// first line is a special case
|
||||
String firstline;
|
||||
if(continueReading(reader)) {
|
||||
firstline = readLine(reader);
|
||||
} else {
|
||||
String errMsg = "First line of the HapmapPoolFile " + reader.toString() + " could not be read. Please check that the path and file properly formatted.";
|
||||
throw new StingException(errMsg);
|
||||
}
|
||||
|
||||
StringTokenizer tokFirstLine = new StringTokenizer(firstline);
|
||||
nPools = tokFirstLine.countTokens();
|
||||
LinkedList<String>[] namesByPool = new LinkedList[nPools];
|
||||
|
||||
for( int firstLineIterator = 0; firstLineIterator < nPools; firstLineIterator ++) {
|
||||
namesByPool[firstLineIterator] = new LinkedList();
|
||||
namesByPool[firstLineIterator].add(tokFirstLine.nextToken());
|
||||
}
|
||||
|
||||
while(continueReading(reader)) {
|
||||
String line = readLine(reader);
|
||||
StringTokenizer tokLine = new StringTokenizer(line);
|
||||
int newNames = tokLine.countTokens();
|
||||
for(int lineIt = 0; lineIt < newNames; lineIt ++ ) {
|
||||
namesByPool[lineIt].add(tokLine.nextToken());
|
||||
}
|
||||
}
|
||||
|
||||
convertListOfNamesToMatrix(namesByPool);
|
||||
}
|
||||
|
||||
private boolean continueReading(BufferedReader reader) {
|
||||
boolean continueReading = false;
|
||||
try {
|
||||
continueReading = reader.ready();
|
||||
} catch(IOException e) {
|
||||
continueReading = false;
|
||||
}
|
||||
return continueReading;
|
||||
}
|
||||
|
||||
private String readLine(BufferedReader reader) {
|
||||
String line;
|
||||
try {
|
||||
line = reader.readLine();
|
||||
} catch( IOException e) {
|
||||
String errMsg = "BufferedReader pointing to "+reader.toString()+" was declared ready but no line could be read from it.";
|
||||
throw new StingException(errMsg,e);
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
private void convertListOfNamesToMatrix(LinkedList<String>[] names) {
|
||||
// initialize matrix
|
||||
for( int pool = 0; pool < nPools; pool ++) {
|
||||
individualsByPool[pool] = new String[names[pool].size()];
|
||||
individualsByPool[pool] = names[pool].toArray(individualsByPool[pool]);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
|
@ -58,6 +58,9 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
@Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false)
|
||||
public int numPeopleInPool = 1;
|
||||
|
||||
@Argument(fullName = "pathToHapmapPoolFile", shortName="HPF", doc="If using a variant file from a pooled caller on pools of hapmap individuals, this field provides a filepath to the pool construction file listing which hapmap individuals are in which pool", required=false)
|
||||
public String pathToHapmapPoolFile = null;
|
||||
|
||||
String analysisFilenameBase = null;
|
||||
|
||||
final String knownSNPDBName = "dbSNP";
|
||||
|
|
|
|||
Loading…
Reference in New Issue