Added ROD for parsing hapmap3 genotype files.
Tweak to TabularROD to allow HapMapGenotypeROD to work. Added HapMapGenotypeROD to list of RODs in ReferenceOrderedData.java. Modified MultiSampleCaller to return a single object with most of the relvant information. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e5e249d4ac
commit
4019cd2bd7
|
|
@ -0,0 +1,44 @@
|
||||||
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
|
public class HapMapGenotypeROD extends TabularROD
|
||||||
|
{
|
||||||
|
public HapMapGenotypeROD(final String name)
|
||||||
|
{
|
||||||
|
super(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenomeLoc getLocation()
|
||||||
|
{
|
||||||
|
//System.out.printf("chrom: %s; pos: %s\n", this.get("chrom"), this.get("pos"));
|
||||||
|
|
||||||
|
return GenomeLocParser.createGenomeLoc(this.get("chrom").replaceAll("chr", ""), Long.parseLong(this.get("pos")));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getSampleIDs()
|
||||||
|
{
|
||||||
|
ArrayList<String> header = this.getHeader();
|
||||||
|
String[] sample_ids = new String[header.size()-11];
|
||||||
|
for (int i = 11; i < header.size(); i++)
|
||||||
|
{
|
||||||
|
sample_ids[i-11] = header.get(i);
|
||||||
|
}
|
||||||
|
return sample_ids;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getGenotypes()
|
||||||
|
{
|
||||||
|
ArrayList<String> header = this.getHeader();
|
||||||
|
String[] genotypes = new String[header.size()-11];
|
||||||
|
for (int i = 11; i < header.size(); i++)
|
||||||
|
{
|
||||||
|
genotypes[i-11] = this.get(header.get(i));
|
||||||
|
}
|
||||||
|
return genotypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -68,6 +68,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
||||||
addModule("PooledEM", PooledEMSNPROD.class);
|
addModule("PooledEM", PooledEMSNPROD.class);
|
||||||
addModule("1KGSNPs", KGenomesSNPROD.class);
|
addModule("1KGSNPs", KGenomesSNPROD.class);
|
||||||
addModule("SangerSNP", SangerSNPROD.class);
|
addModule("SangerSNP", SangerSNPROD.class);
|
||||||
|
addModule("HapMapGenotype", HapMapGenotypeROD.class);
|
||||||
addModule("Intervals", IntervalRod.class);
|
addModule("Intervals", IntervalRod.class);
|
||||||
addModule("Variants", rodVariants.class);
|
addModule("Variants", rodVariants.class);
|
||||||
}
|
}
|
||||||
|
|
@ -309,22 +310,22 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
||||||
// Parsing
|
// Parsing
|
||||||
//
|
//
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
|
private Constructor<ROD> parsing_constructor;
|
||||||
private ROD newROD( final String name, final Class<ROD> type ) {
|
private ROD newROD( final String name, final Class<ROD> type ) {
|
||||||
try {
|
try {
|
||||||
Constructor<ROD> c = type.getConstructor(String.class);
|
return (ROD)parsing_constructor.newInstance(name);
|
||||||
return (ROD)c.newInstance(name);
|
|
||||||
} catch ( java.lang.InstantiationException e ) {
|
} catch ( java.lang.InstantiationException e ) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
} catch ( java.lang.IllegalAccessException e ) {
|
} catch ( java.lang.IllegalAccessException e ) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
} catch ( java.lang.NoSuchMethodException e ) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
} catch ( InvocationTargetException e ) {
|
} catch ( InvocationTargetException e ) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Object initializeROD(final String name, final File file, final Class<ROD> type) {
|
private Object initializeROD(final String name, final File file, final Class<ROD> type) {
|
||||||
|
try { parsing_constructor = type.getConstructor(String.class); }
|
||||||
|
catch (java.lang.NoSuchMethodException e) { throw new RuntimeException(e); }
|
||||||
ROD rod = newROD(name, type);
|
ROD rod = newROD(name, type);
|
||||||
try {
|
try {
|
||||||
return rod.initialize(file);
|
return rod.initialize(file);
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
|
||||||
public static String DELIMITER_REGEX = DEFAULT_DELIMITER_REGEX;
|
public static String DELIMITER_REGEX = DEFAULT_DELIMITER_REGEX;
|
||||||
|
|
||||||
private static int MAX_LINES_TO_LOOK_FOR_HEADER = 1000;
|
private static int MAX_LINES_TO_LOOK_FOR_HEADER = 1000;
|
||||||
private static Pattern HEADER_PATTERN = Pattern.compile("^\\s*((HEADER)|(loc)).*");
|
private static Pattern HEADER_PATTERN = Pattern.compile("^\\s*((HEADER)|(loc)|(rs#)).*");
|
||||||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||||
|
|
||||||
private static int parsedRecords = 0;
|
private static int parsedRecords = 0;
|
||||||
|
|
@ -91,7 +91,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
|
||||||
|
|
||||||
private static boolean headerIsGood(final ArrayList<String> header) {
|
private static boolean headerIsGood(final ArrayList<String> header) {
|
||||||
if ( header.size() == 0 ) return false;
|
if ( header.size() == 0 ) return false;
|
||||||
if ( ! header.get(0).equals("HEADER") ) return false;
|
//if ( ! header.get(0).equals("HEADER") ) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -304,4 +304,4 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
||||||
// Beta iterative multi-sample caller
|
// Beta iterative multi-sample caller
|
||||||
// j.maguire 6-11-2009
|
// j.maguire 6-11-2009
|
||||||
|
|
||||||
public class MultiSampleCaller extends LocusWalker<String,String>
|
public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSampleCallResult,String>
|
||||||
{
|
{
|
||||||
@Argument(required=false, shortName="fractional_counts", doc="should we use fractional counts?") public boolean FRACTIONAL_COUNTS = false;
|
@Argument(required=false, shortName="fractional_counts", doc="should we use fractional counts?") public boolean FRACTIONAL_COUNTS = false;
|
||||||
@Argument(required=false, shortName="max_iterations", doc="Maximum number of iterations for EM") public int MAX_ITERATIONS = 10;
|
@Argument(required=false, shortName="max_iterations", doc="Maximum number of iterations for EM") public int MAX_ITERATIONS = 10;
|
||||||
|
|
@ -36,6 +36,41 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
||||||
PrintStream individual_output_file;
|
PrintStream individual_output_file;
|
||||||
PrintStream discovery_output_file;
|
PrintStream discovery_output_file;
|
||||||
|
|
||||||
|
|
||||||
|
class MultiSampleCallResult
|
||||||
|
{
|
||||||
|
char ref;
|
||||||
|
char alt;
|
||||||
|
EM_Result em_result;
|
||||||
|
double lod;
|
||||||
|
double strand_score;
|
||||||
|
double pD;
|
||||||
|
double pNull;
|
||||||
|
String in_dbsnp;
|
||||||
|
int n_ref;
|
||||||
|
int n_het;
|
||||||
|
int n_hom;
|
||||||
|
int EM_N;
|
||||||
|
double alt_freq;
|
||||||
|
public MultiSampleCallResult(char ref, char alt, EM_Result em_result, double lod, double strand_score, double pD, double pNull, String in_dbsnp, int n_ref, int n_het, int n_hom, int EM_N, double alt_freq)
|
||||||
|
{
|
||||||
|
this.ref = ref;
|
||||||
|
this.alt = alt;
|
||||||
|
this.em_result = em_result;
|
||||||
|
this.lod = lod;
|
||||||
|
this.strand_score = strand_score;
|
||||||
|
this.pD = pD;
|
||||||
|
this.pNull = pNull;
|
||||||
|
this.in_dbsnp = in_dbsnp;
|
||||||
|
this.n_ref = n_ref;
|
||||||
|
this.n_het = n_het;
|
||||||
|
this.n_hom = n_hom;
|
||||||
|
this.EM_N = EM_N;
|
||||||
|
this.alt_freq = alt_freq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/////////
|
/////////
|
||||||
// Walker Interface Functions
|
// Walker Interface Functions
|
||||||
public void initialize()
|
public void initialize()
|
||||||
|
|
@ -76,15 +111,13 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String in_dbsnp = "novel";
|
|
||||||
|
|
||||||
public String map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
public MultiSampleCallResult map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||||
{
|
{
|
||||||
if (ref == 'N') { return null; }
|
if (ref == 'N') { return null; }
|
||||||
this.ref = ref;
|
this.ref = ref;
|
||||||
if (tracker.lookup("DBSNP", null) != null) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; }
|
MultiSampleCallResult result = this.MultiSampleCall(tracker, ref, context, sample_names);
|
||||||
this.MultiSampleCall(context, sample_names);
|
return result;
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(String sum)
|
public void onTraversalDone(String sum)
|
||||||
|
|
@ -98,7 +131,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String reduce(String record, String sum)
|
public String reduce(MultiSampleCallResult record, String sum)
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -273,11 +306,15 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
||||||
|
|
||||||
class EM_Result
|
class EM_Result
|
||||||
{
|
{
|
||||||
|
String[] sample_names;
|
||||||
GenotypeLikelihoods[] genotype_likelihoods;
|
GenotypeLikelihoods[] genotype_likelihoods;
|
||||||
double[] allele_likelihoods;
|
double[] allele_likelihoods;
|
||||||
int EM_N;
|
int EM_N;
|
||||||
public EM_Result(GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
|
|
||||||
|
public EM_Result(List<String> sample_names, GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
|
||||||
{
|
{
|
||||||
|
this.sample_names = new String[1];
|
||||||
|
this.sample_names = sample_names.toArray(this.sample_names);
|
||||||
this.genotype_likelihoods = genotype_likelihoods;
|
this.genotype_likelihoods = genotype_likelihoods;
|
||||||
this.allele_likelihoods = allele_likelihoods;
|
this.allele_likelihoods = allele_likelihoods;
|
||||||
|
|
||||||
|
|
@ -317,7 +354,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new EM_Result(G, allele_likelihoods);
|
return new EM_Result(sample_names, G, allele_likelihoods);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hacky global variables for debugging.
|
// Hacky global variables for debugging.
|
||||||
|
|
@ -429,8 +466,11 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
||||||
}
|
}
|
||||||
|
|
||||||
// This should actually return a GLF Record
|
// This should actually return a GLF Record
|
||||||
String MultiSampleCall(LocusContext context, List<String> sample_names)
|
MultiSampleCallResult MultiSampleCall(RefMetaDataTracker tracker, char ref, LocusContext context, List<String> sample_names)
|
||||||
{
|
{
|
||||||
|
String in_dbsnp;
|
||||||
|
if (tracker.lookup("DBSNP", null) != null) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; }
|
||||||
|
|
||||||
LocusContext[] contexts = filterLocusContextBySample(context, sample_names, 0);
|
LocusContext[] contexts = filterLocusContextBySample(context, sample_names, 0);
|
||||||
double lod = LOD(contexts);
|
double lod = LOD(contexts);
|
||||||
double strand_score = StrandScore(context);
|
double strand_score = StrandScore(context);
|
||||||
|
|
@ -472,7 +512,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
||||||
individual_output_file.printf("\n");
|
individual_output_file.printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return new MultiSampleCallResult(ref, alt, em_result, lod, strand_score, pD, pNull, in_dbsnp, n_ref, n_het, n_hom, em_result.EM_N, alt_freq);
|
||||||
}
|
}
|
||||||
|
|
||||||
// END Calling Functions
|
// END Calling Functions
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue