Added ROD for parsing hapmap3 genotype files.

Tweak to TabularROD to allow HapMapGenotypeROD to work.
Added HapMapGenotypeROD to list of RODs in ReferenceOrderedData.java.
Modified MultiSampleCaller to return a single object with most of the relvant information.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-07-05 16:28:24 +00:00
parent e5e249d4ac
commit 4019cd2bd7
4 changed files with 103 additions and 18 deletions

View File

@ -0,0 +1,44 @@
package org.broadinstitute.sting.gatk.refdata;
import java.util.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
public class HapMapGenotypeROD extends TabularROD
{
public HapMapGenotypeROD(final String name)
{
super(name);
}
public GenomeLoc getLocation()
{
//System.out.printf("chrom: %s; pos: %s\n", this.get("chrom"), this.get("pos"));
return GenomeLocParser.createGenomeLoc(this.get("chrom").replaceAll("chr", ""), Long.parseLong(this.get("pos")));
}
public String[] getSampleIDs()
{
ArrayList<String> header = this.getHeader();
String[] sample_ids = new String[header.size()-11];
for (int i = 11; i < header.size(); i++)
{
sample_ids[i-11] = header.get(i);
}
return sample_ids;
}
public String[] getGenotypes()
{
ArrayList<String> header = this.getHeader();
String[] genotypes = new String[header.size()-11];
for (int i = 11; i < header.size(); i++)
{
genotypes[i-11] = this.get(header.get(i));
}
return genotypes;
}
}

View File

@ -68,6 +68,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
addModule("PooledEM", PooledEMSNPROD.class);
addModule("1KGSNPs", KGenomesSNPROD.class);
addModule("SangerSNP", SangerSNPROD.class);
addModule("HapMapGenotype", HapMapGenotypeROD.class);
addModule("Intervals", IntervalRod.class);
addModule("Variants", rodVariants.class);
}
@ -309,22 +310,22 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
// Parsing
//
// ----------------------------------------------------------------------
private Constructor<ROD> parsing_constructor;
private ROD newROD( final String name, final Class<ROD> type ) {
try {
Constructor<ROD> c = type.getConstructor(String.class);
return (ROD)c.newInstance(name);
return (ROD)parsing_constructor.newInstance(name);
} catch ( java.lang.InstantiationException e ) {
throw new RuntimeException(e);
} catch ( java.lang.IllegalAccessException e ) {
throw new RuntimeException(e);
} catch ( java.lang.NoSuchMethodException e ) {
throw new RuntimeException(e);
} catch ( InvocationTargetException e ) {
throw new RuntimeException(e);
}
}
private Object initializeROD(final String name, final File file, final Class<ROD> type) {
try { parsing_constructor = type.getConstructor(String.class); }
catch (java.lang.NoSuchMethodException e) { throw new RuntimeException(e); }
ROD rod = newROD(name, type);
try {
return rod.initialize(file);

View File

@ -51,7 +51,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
public static String DELIMITER_REGEX = DEFAULT_DELIMITER_REGEX;
private static int MAX_LINES_TO_LOOK_FOR_HEADER = 1000;
private static Pattern HEADER_PATTERN = Pattern.compile("^\\s*((HEADER)|(loc)).*");
private static Pattern HEADER_PATTERN = Pattern.compile("^\\s*((HEADER)|(loc)|(rs#)).*");
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static int parsedRecords = 0;
@ -91,7 +91,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
private static boolean headerIsGood(final ArrayList<String> header) {
if ( header.size() == 0 ) return false;
if ( ! header.get(0).equals("HEADER") ) return false;
//if ( ! header.get(0).equals("HEADER") ) return false;
return true;
}
@ -304,4 +304,4 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
return true;
}
}
}

View File

@ -21,7 +21,7 @@ import java.io.*;
// Beta iterative multi-sample caller
// j.maguire 6-11-2009
public class MultiSampleCaller extends LocusWalker<String,String>
public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSampleCallResult,String>
{
@Argument(required=false, shortName="fractional_counts", doc="should we use fractional counts?") public boolean FRACTIONAL_COUNTS = false;
@Argument(required=false, shortName="max_iterations", doc="Maximum number of iterations for EM") public int MAX_ITERATIONS = 10;
@ -36,6 +36,41 @@ public class MultiSampleCaller extends LocusWalker<String,String>
PrintStream individual_output_file;
PrintStream discovery_output_file;
class MultiSampleCallResult
{
char ref;
char alt;
EM_Result em_result;
double lod;
double strand_score;
double pD;
double pNull;
String in_dbsnp;
int n_ref;
int n_het;
int n_hom;
int EM_N;
double alt_freq;
public MultiSampleCallResult(char ref, char alt, EM_Result em_result, double lod, double strand_score, double pD, double pNull, String in_dbsnp, int n_ref, int n_het, int n_hom, int EM_N, double alt_freq)
{
this.ref = ref;
this.alt = alt;
this.em_result = em_result;
this.lod = lod;
this.strand_score = strand_score;
this.pD = pD;
this.pNull = pNull;
this.in_dbsnp = in_dbsnp;
this.n_ref = n_ref;
this.n_het = n_het;
this.n_hom = n_hom;
this.EM_N = EM_N;
this.alt_freq = alt_freq;
}
}
/////////
// Walker Interface Functions
public void initialize()
@ -76,15 +111,13 @@ public class MultiSampleCaller extends LocusWalker<String,String>
}
}
public String in_dbsnp = "novel";
public String map(RefMetaDataTracker tracker, char ref, LocusContext context)
public MultiSampleCallResult map(RefMetaDataTracker tracker, char ref, LocusContext context)
{
if (ref == 'N') { return null; }
this.ref = ref;
if (tracker.lookup("DBSNP", null) != null) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; }
this.MultiSampleCall(context, sample_names);
return null;
MultiSampleCallResult result = this.MultiSampleCall(tracker, ref, context, sample_names);
return result;
}
public void onTraversalDone(String sum)
@ -98,7 +131,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
return null;
}
public String reduce(String record, String sum)
public String reduce(MultiSampleCallResult record, String sum)
{
return null;
}
@ -273,11 +306,15 @@ public class MultiSampleCaller extends LocusWalker<String,String>
class EM_Result
{
String[] sample_names;
GenotypeLikelihoods[] genotype_likelihoods;
double[] allele_likelihoods;
int EM_N;
public EM_Result(GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
public EM_Result(List<String> sample_names, GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
{
this.sample_names = new String[1];
this.sample_names = sample_names.toArray(this.sample_names);
this.genotype_likelihoods = genotype_likelihoods;
this.allele_likelihoods = allele_likelihoods;
@ -317,7 +354,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
}
}
return new EM_Result(G, allele_likelihoods);
return new EM_Result(sample_names, G, allele_likelihoods);
}
// Hacky global variables for debugging.
@ -429,8 +466,11 @@ public class MultiSampleCaller extends LocusWalker<String,String>
}
// This should actually return a GLF Record
String MultiSampleCall(LocusContext context, List<String> sample_names)
MultiSampleCallResult MultiSampleCall(RefMetaDataTracker tracker, char ref, LocusContext context, List<String> sample_names)
{
String in_dbsnp;
if (tracker.lookup("DBSNP", null) != null) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; }
LocusContext[] contexts = filterLocusContextBySample(context, sample_names, 0);
double lod = LOD(contexts);
double strand_score = StrandScore(context);
@ -472,7 +512,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
individual_output_file.printf("\n");
}
return null;
return new MultiSampleCallResult(ref, alt, em_result, lod, strand_score, pD, pNull, in_dbsnp, n_ref, n_het, n_hom, em_result.EM_N, alt_freq);
}
// END Calling Functions