Added ROD for parsing hapmap3 genotype files.
Tweak to TabularROD to allow HapMapGenotypeROD to work. Added HapMapGenotypeROD to list of RODs in ReferenceOrderedData.java. Modified MultiSampleCaller to return a single object with most of the relvant information. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e5e249d4ac
commit
4019cd2bd7
|
|
@ -0,0 +1,44 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
public class HapMapGenotypeROD extends TabularROD
|
||||
{
|
||||
public HapMapGenotypeROD(final String name)
|
||||
{
|
||||
super(name);
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation()
|
||||
{
|
||||
//System.out.printf("chrom: %s; pos: %s\n", this.get("chrom"), this.get("pos"));
|
||||
|
||||
return GenomeLocParser.createGenomeLoc(this.get("chrom").replaceAll("chr", ""), Long.parseLong(this.get("pos")));
|
||||
}
|
||||
|
||||
public String[] getSampleIDs()
|
||||
{
|
||||
ArrayList<String> header = this.getHeader();
|
||||
String[] sample_ids = new String[header.size()-11];
|
||||
for (int i = 11; i < header.size(); i++)
|
||||
{
|
||||
sample_ids[i-11] = header.get(i);
|
||||
}
|
||||
return sample_ids;
|
||||
}
|
||||
|
||||
public String[] getGenotypes()
|
||||
{
|
||||
ArrayList<String> header = this.getHeader();
|
||||
String[] genotypes = new String[header.size()-11];
|
||||
for (int i = 11; i < header.size(); i++)
|
||||
{
|
||||
genotypes[i-11] = this.get(header.get(i));
|
||||
}
|
||||
return genotypes;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -68,6 +68,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
addModule("PooledEM", PooledEMSNPROD.class);
|
||||
addModule("1KGSNPs", KGenomesSNPROD.class);
|
||||
addModule("SangerSNP", SangerSNPROD.class);
|
||||
addModule("HapMapGenotype", HapMapGenotypeROD.class);
|
||||
addModule("Intervals", IntervalRod.class);
|
||||
addModule("Variants", rodVariants.class);
|
||||
}
|
||||
|
|
@ -309,22 +310,22 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
// Parsing
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
private Constructor<ROD> parsing_constructor;
|
||||
private ROD newROD( final String name, final Class<ROD> type ) {
|
||||
try {
|
||||
Constructor<ROD> c = type.getConstructor(String.class);
|
||||
return (ROD)c.newInstance(name);
|
||||
return (ROD)parsing_constructor.newInstance(name);
|
||||
} catch ( java.lang.InstantiationException e ) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( java.lang.IllegalAccessException e ) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( java.lang.NoSuchMethodException e ) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( InvocationTargetException e ) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private Object initializeROD(final String name, final File file, final Class<ROD> type) {
|
||||
try { parsing_constructor = type.getConstructor(String.class); }
|
||||
catch (java.lang.NoSuchMethodException e) { throw new RuntimeException(e); }
|
||||
ROD rod = newROD(name, type);
|
||||
try {
|
||||
return rod.initialize(file);
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
|
|||
public static String DELIMITER_REGEX = DEFAULT_DELIMITER_REGEX;
|
||||
|
||||
private static int MAX_LINES_TO_LOOK_FOR_HEADER = 1000;
|
||||
private static Pattern HEADER_PATTERN = Pattern.compile("^\\s*((HEADER)|(loc)).*");
|
||||
private static Pattern HEADER_PATTERN = Pattern.compile("^\\s*((HEADER)|(loc)|(rs#)).*");
|
||||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
|
||||
private static int parsedRecords = 0;
|
||||
|
|
@ -91,7 +91,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
|
|||
|
||||
private static boolean headerIsGood(final ArrayList<String> header) {
|
||||
if ( header.size() == 0 ) return false;
|
||||
if ( ! header.get(0).equals("HEADER") ) return false;
|
||||
//if ( ! header.get(0).equals("HEADER") ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -304,4 +304,4 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
|
|||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
|||
// Beta iterative multi-sample caller
|
||||
// j.maguire 6-11-2009
|
||||
|
||||
public class MultiSampleCaller extends LocusWalker<String,String>
|
||||
public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSampleCallResult,String>
|
||||
{
|
||||
@Argument(required=false, shortName="fractional_counts", doc="should we use fractional counts?") public boolean FRACTIONAL_COUNTS = false;
|
||||
@Argument(required=false, shortName="max_iterations", doc="Maximum number of iterations for EM") public int MAX_ITERATIONS = 10;
|
||||
|
|
@ -36,6 +36,41 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
|||
PrintStream individual_output_file;
|
||||
PrintStream discovery_output_file;
|
||||
|
||||
|
||||
class MultiSampleCallResult
|
||||
{
|
||||
char ref;
|
||||
char alt;
|
||||
EM_Result em_result;
|
||||
double lod;
|
||||
double strand_score;
|
||||
double pD;
|
||||
double pNull;
|
||||
String in_dbsnp;
|
||||
int n_ref;
|
||||
int n_het;
|
||||
int n_hom;
|
||||
int EM_N;
|
||||
double alt_freq;
|
||||
public MultiSampleCallResult(char ref, char alt, EM_Result em_result, double lod, double strand_score, double pD, double pNull, String in_dbsnp, int n_ref, int n_het, int n_hom, int EM_N, double alt_freq)
|
||||
{
|
||||
this.ref = ref;
|
||||
this.alt = alt;
|
||||
this.em_result = em_result;
|
||||
this.lod = lod;
|
||||
this.strand_score = strand_score;
|
||||
this.pD = pD;
|
||||
this.pNull = pNull;
|
||||
this.in_dbsnp = in_dbsnp;
|
||||
this.n_ref = n_ref;
|
||||
this.n_het = n_het;
|
||||
this.n_hom = n_hom;
|
||||
this.EM_N = EM_N;
|
||||
this.alt_freq = alt_freq;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/////////
|
||||
// Walker Interface Functions
|
||||
public void initialize()
|
||||
|
|
@ -76,15 +111,13 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
|||
}
|
||||
}
|
||||
|
||||
public String in_dbsnp = "novel";
|
||||
|
||||
public String map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||
public MultiSampleCallResult map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||
{
|
||||
if (ref == 'N') { return null; }
|
||||
this.ref = ref;
|
||||
if (tracker.lookup("DBSNP", null) != null) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; }
|
||||
this.MultiSampleCall(context, sample_names);
|
||||
return null;
|
||||
MultiSampleCallResult result = this.MultiSampleCall(tracker, ref, context, sample_names);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void onTraversalDone(String sum)
|
||||
|
|
@ -98,7 +131,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
|||
return null;
|
||||
}
|
||||
|
||||
public String reduce(String record, String sum)
|
||||
public String reduce(MultiSampleCallResult record, String sum)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
|
@ -273,11 +306,15 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
|||
|
||||
class EM_Result
|
||||
{
|
||||
String[] sample_names;
|
||||
GenotypeLikelihoods[] genotype_likelihoods;
|
||||
double[] allele_likelihoods;
|
||||
int EM_N;
|
||||
public EM_Result(GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
|
||||
|
||||
public EM_Result(List<String> sample_names, GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
|
||||
{
|
||||
this.sample_names = new String[1];
|
||||
this.sample_names = sample_names.toArray(this.sample_names);
|
||||
this.genotype_likelihoods = genotype_likelihoods;
|
||||
this.allele_likelihoods = allele_likelihoods;
|
||||
|
||||
|
|
@ -317,7 +354,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
|||
}
|
||||
}
|
||||
|
||||
return new EM_Result(G, allele_likelihoods);
|
||||
return new EM_Result(sample_names, G, allele_likelihoods);
|
||||
}
|
||||
|
||||
// Hacky global variables for debugging.
|
||||
|
|
@ -429,8 +466,11 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
|||
}
|
||||
|
||||
// This should actually return a GLF Record
|
||||
String MultiSampleCall(LocusContext context, List<String> sample_names)
|
||||
MultiSampleCallResult MultiSampleCall(RefMetaDataTracker tracker, char ref, LocusContext context, List<String> sample_names)
|
||||
{
|
||||
String in_dbsnp;
|
||||
if (tracker.lookup("DBSNP", null) != null) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; }
|
||||
|
||||
LocusContext[] contexts = filterLocusContextBySample(context, sample_names, 0);
|
||||
double lod = LOD(contexts);
|
||||
double strand_score = StrandScore(context);
|
||||
|
|
@ -472,7 +512,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
|
|||
individual_output_file.printf("\n");
|
||||
}
|
||||
|
||||
return null;
|
||||
return new MultiSampleCallResult(ref, alt, em_result, lod, strand_score, pD, pNull, in_dbsnp, n_ref, n_het, n_hom, em_result.EM_N, alt_freq);
|
||||
}
|
||||
|
||||
// END Calling Functions
|
||||
|
|
|
|||
Loading…
Reference in New Issue