- Updated --hapmap switch to --hapmap-chip to reflect the data being chip data for an individual rather than population allele frequency data in Hapmap

- Corrected some bugs to get metrics logging working
- Added a switch --force_1base_probs to ignore 4-base probalities if they exist


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@287 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-04-03 17:32:31 +00:00
parent edc44807af
commit bef475778f
3 changed files with 21 additions and 14 deletions

View File

@ -41,6 +41,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
public String Analysis_Name = null;
public String DBSNP_FILE = null;
public String HAPMAP_FILE = null;
public String HAPMAP_CHIP_FILE = null;
public Boolean ENABLED_THREADED_IO = false;
public Boolean UNSAFE = false;
public String MAX_ON_FLY_SORTS = null;
@ -104,6 +105,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
m_parser.addRequiredArg("analysis_type", "T", "Type of analysis to run", "Analysis_Name");
m_parser.addOptionalArg("DBSNP", "D", "DBSNP file", "DBSNP_FILE");
m_parser.addOptionalArg("hapmap", "H", "Hapmap file", "HAPMAP_FILE");
m_parser.addOptionalArg("hapmap_chip", "hc", "Hapmap chip file", "HAPMAP_CHIP_FILE");
m_parser.addOptionalFlag("threaded_IO", "P", "If set, enables threaded I/O operations", "ENABLED_THREADED_IO");
m_parser.addOptionalFlag("unsafe", "U", "If set, enables unsafe operations, nothing will be checked at runtime.", "UNSAFE");
m_parser.addOptionalArg("sort_on_the_fly", "sort", "Maximum number of reads to sort on the fly", "MAX_ON_FLY_SORTS");
@ -176,6 +178,10 @@ public class GenomeAnalysisTK extends CommandLineProgram {
//dbsnp.testMe();
rods.add(hapmap); // { gff, dbsnp };
}
if ( HAPMAP_CHIP_FILE != null ) {
ReferenceOrderedData<rodGFF> hapmapChip = new ReferenceOrderedData<rodGFF>(new File(HAPMAP_CHIP_FILE), rodGFF.class );
rods.add(hapmapChip);
}
}
initializeOutputStreams();

View File

@ -18,11 +18,12 @@ import java.io.PrintStream;
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>// implements AllelicVariant
{
@Argument public int N;
@Argument(doc="Number of chromosomes in data") public int N;
@Argument(required=false,defaultValue="0") public int DOWNSAMPLE;
@Argument public String GFF_OUTPUT_FILE;
@Argument(doc="File to output GFF formatted allele frequency calls") public String GFF_OUTPUT_FILE;
@Argument(shortName="met", doc="Turns on logging of metrics on the fly with AlleleFrequency calculation") public boolean LOG_METRICS;
@Argument(required=false, defaultValue="", doc="Name of file where metrics will output") public String METRICS_OUTPUT_FILE;
@Argument(required=false, defaultValue="metrics.out", doc="Name of file where metrics will output") public String METRICS_OUTPUT_FILE;
@Argument(required=false, doc="Ignores 4-base probabilities and only uses the quality of the best/called base") public static boolean FORCE_1BASE_PROBS;
protected static Logger logger = Logger.getLogger(AlleleFrequencyWalker.class);
@ -118,7 +119,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return alleleFreq;
}
static public String getBases (LocusContext context)
static public String getBases (LocusContext context)
{
// Convert bases to CharArray
int numReads = context.getReads().size(); //numReads();
@ -135,7 +136,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return new String(bases);
}
static public double[][] getQuals (LocusContext context)
static public double[][] getQuals (LocusContext context)
{
int numReads = context.getReads().size(); //numReads();
double[][] quals = new double[numReads][4];
@ -153,7 +154,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
// For other 3 bases, check if 2-base probs exist
assert (reads.size() > 0);
Object SQ_field = reads.get(0).getAttribute("SQ");
if (SQ_field == null) {
if (SQ_field == null || FORCE_1BASE_PROBS) {
// Set all nonref qual scores to their share of the remaining probality not "used" by the reference base's qual
double nonref_quals = (1.0 - quals[i][callednum]) / 3;
for (int b=0; b<4; b++)
@ -398,7 +399,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
confident_ref_interval_length = 0;
last_position_considered = -1;
inside_confident_ref_interval = false;
if (LOG_METRICS) metrics = new AlleleMetrics("SNTH");//METRICS_OUTPUT_FILE);
if (LOG_METRICS) metrics = new AlleleMetrics(METRICS_OUTPUT_FILE);
return "";
}
@ -508,6 +509,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
inside_confident_ref_interval = false;
}
if (LOG_METRICS) metrics.printMetrics();
try
{
@ -522,7 +524,6 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
System.exit(-1);
}
if (LOG_METRICS) metrics.printMetrics();
}
static void print_base_qual_matrix(double [][]quals) {

View File

@ -102,9 +102,9 @@ public class AlleleMetrics {
}
// Hapmap debug info
out.format("HAPMAP %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef);
String called_genotype = alleleFreq.asString();
out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
//out.format("HAPMAP DEBUG %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef);
//String called_genotype = alleleFreq.asString();
//out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
@ -114,7 +114,7 @@ public class AlleleMetrics {
}else{
hapmap_genotype_incorrect++;
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
System.out.printf(" INCORRECT GENOTYPE");
//out.printf(" INCORRECT GENOTYPE");
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
}
}
@ -127,13 +127,13 @@ public class AlleleMetrics {
boolean called_var = alleleFreq.qstar != 0.0;
if (hapmap_q != -1 && hapmap_var != called_var) {
hapmap_refvar_incorrect++;
System.out.printf(" INCORRECT REFVAR CALL");
//out.printf(" INCORRECT REFVAR CALL");
}else{
hapmap_refvar_correct++;
}
}
out.print("\n");
//out.print("\n");
}
}