- Updated --hapmap switch to --hapmap-chip to reflect the data being chip data for an individual rather than population allele frequency data in Hapmap

- Corrected some bugs to get metrics logging working
- Added a switch --force_1base_probs to ignore 4-base probalities if they exist


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@287 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-04-03 17:32:31 +00:00
parent edc44807af
commit bef475778f
3 changed files with 21 additions and 14 deletions

View File

@ -41,6 +41,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
public String Analysis_Name = null; public String Analysis_Name = null;
public String DBSNP_FILE = null; public String DBSNP_FILE = null;
public String HAPMAP_FILE = null; public String HAPMAP_FILE = null;
public String HAPMAP_CHIP_FILE = null;
public Boolean ENABLED_THREADED_IO = false; public Boolean ENABLED_THREADED_IO = false;
public Boolean UNSAFE = false; public Boolean UNSAFE = false;
public String MAX_ON_FLY_SORTS = null; public String MAX_ON_FLY_SORTS = null;
@ -104,6 +105,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
m_parser.addRequiredArg("analysis_type", "T", "Type of analysis to run", "Analysis_Name"); m_parser.addRequiredArg("analysis_type", "T", "Type of analysis to run", "Analysis_Name");
m_parser.addOptionalArg("DBSNP", "D", "DBSNP file", "DBSNP_FILE"); m_parser.addOptionalArg("DBSNP", "D", "DBSNP file", "DBSNP_FILE");
m_parser.addOptionalArg("hapmap", "H", "Hapmap file", "HAPMAP_FILE"); m_parser.addOptionalArg("hapmap", "H", "Hapmap file", "HAPMAP_FILE");
m_parser.addOptionalArg("hapmap_chip", "hc", "Hapmap chip file", "HAPMAP_CHIP_FILE");
m_parser.addOptionalFlag("threaded_IO", "P", "If set, enables threaded I/O operations", "ENABLED_THREADED_IO"); m_parser.addOptionalFlag("threaded_IO", "P", "If set, enables threaded I/O operations", "ENABLED_THREADED_IO");
m_parser.addOptionalFlag("unsafe", "U", "If set, enables unsafe operations, nothing will be checked at runtime.", "UNSAFE"); m_parser.addOptionalFlag("unsafe", "U", "If set, enables unsafe operations, nothing will be checked at runtime.", "UNSAFE");
m_parser.addOptionalArg("sort_on_the_fly", "sort", "Maximum number of reads to sort on the fly", "MAX_ON_FLY_SORTS"); m_parser.addOptionalArg("sort_on_the_fly", "sort", "Maximum number of reads to sort on the fly", "MAX_ON_FLY_SORTS");
@ -176,6 +178,10 @@ public class GenomeAnalysisTK extends CommandLineProgram {
//dbsnp.testMe(); //dbsnp.testMe();
rods.add(hapmap); // { gff, dbsnp }; rods.add(hapmap); // { gff, dbsnp };
} }
if ( HAPMAP_CHIP_FILE != null ) {
ReferenceOrderedData<rodGFF> hapmapChip = new ReferenceOrderedData<rodGFF>(new File(HAPMAP_CHIP_FILE), rodGFF.class );
rods.add(hapmapChip);
}
} }
initializeOutputStreams(); initializeOutputStreams();

View File

@ -18,11 +18,12 @@ import java.io.PrintStream;
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>// implements AllelicVariant public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>// implements AllelicVariant
{ {
@Argument public int N; @Argument(doc="Number of chromosomes in data") public int N;
@Argument(required=false,defaultValue="0") public int DOWNSAMPLE; @Argument(required=false,defaultValue="0") public int DOWNSAMPLE;
@Argument public String GFF_OUTPUT_FILE; @Argument(doc="File to output GFF formatted allele frequency calls") public String GFF_OUTPUT_FILE;
@Argument(shortName="met", doc="Turns on logging of metrics on the fly with AlleleFrequency calculation") public boolean LOG_METRICS; @Argument(shortName="met", doc="Turns on logging of metrics on the fly with AlleleFrequency calculation") public boolean LOG_METRICS;
@Argument(required=false, defaultValue="", doc="Name of file where metrics will output") public String METRICS_OUTPUT_FILE; @Argument(required=false, defaultValue="metrics.out", doc="Name of file where metrics will output") public String METRICS_OUTPUT_FILE;
@Argument(required=false, doc="Ignores 4-base probabilities and only uses the quality of the best/called base") public static boolean FORCE_1BASE_PROBS;
protected static Logger logger = Logger.getLogger(AlleleFrequencyWalker.class); protected static Logger logger = Logger.getLogger(AlleleFrequencyWalker.class);
@ -118,7 +119,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return alleleFreq; return alleleFreq;
} }
static public String getBases (LocusContext context) static public String getBases (LocusContext context)
{ {
// Convert bases to CharArray // Convert bases to CharArray
int numReads = context.getReads().size(); //numReads(); int numReads = context.getReads().size(); //numReads();
@ -135,7 +136,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return new String(bases); return new String(bases);
} }
static public double[][] getQuals (LocusContext context) static public double[][] getQuals (LocusContext context)
{ {
int numReads = context.getReads().size(); //numReads(); int numReads = context.getReads().size(); //numReads();
double[][] quals = new double[numReads][4]; double[][] quals = new double[numReads][4];
@ -153,7 +154,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
// For other 3 bases, check if 2-base probs exist // For other 3 bases, check if 2-base probs exist
assert (reads.size() > 0); assert (reads.size() > 0);
Object SQ_field = reads.get(0).getAttribute("SQ"); Object SQ_field = reads.get(0).getAttribute("SQ");
if (SQ_field == null) { if (SQ_field == null || FORCE_1BASE_PROBS) {
// Set all nonref qual scores to their share of the remaining probality not "used" by the reference base's qual // Set all nonref qual scores to their share of the remaining probality not "used" by the reference base's qual
double nonref_quals = (1.0 - quals[i][callednum]) / 3; double nonref_quals = (1.0 - quals[i][callednum]) / 3;
for (int b=0; b<4; b++) for (int b=0; b<4; b++)
@ -398,7 +399,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
confident_ref_interval_length = 0; confident_ref_interval_length = 0;
last_position_considered = -1; last_position_considered = -1;
inside_confident_ref_interval = false; inside_confident_ref_interval = false;
if (LOG_METRICS) metrics = new AlleleMetrics("SNTH");//METRICS_OUTPUT_FILE); if (LOG_METRICS) metrics = new AlleleMetrics(METRICS_OUTPUT_FILE);
return ""; return "";
} }
@ -508,6 +509,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
inside_confident_ref_interval = false; inside_confident_ref_interval = false;
} }
if (LOG_METRICS) metrics.printMetrics();
try try
{ {
@ -522,7 +524,6 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
System.exit(-1); System.exit(-1);
} }
if (LOG_METRICS) metrics.printMetrics();
} }
static void print_base_qual_matrix(double [][]quals) { static void print_base_qual_matrix(double [][]quals) {

View File

@ -102,9 +102,9 @@ public class AlleleMetrics {
} }
// Hapmap debug info // Hapmap debug info
out.format("HAPMAP %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef); //out.format("HAPMAP DEBUG %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef);
String called_genotype = alleleFreq.asString(); //String called_genotype = alleleFreq.asString();
out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt); //out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
if (alleleFreq.lodVsNextBest >= LOD_cutoff) { if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
@ -114,7 +114,7 @@ public class AlleleMetrics {
}else{ }else{
hapmap_genotype_incorrect++; hapmap_genotype_incorrect++;
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context)); //System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
System.out.printf(" INCORRECT GENOTYPE"); //out.printf(" INCORRECT GENOTYPE");
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context)); //AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
} }
} }
@ -127,13 +127,13 @@ public class AlleleMetrics {
boolean called_var = alleleFreq.qstar != 0.0; boolean called_var = alleleFreq.qstar != 0.0;
if (hapmap_q != -1 && hapmap_var != called_var) { if (hapmap_q != -1 && hapmap_var != called_var) {
hapmap_refvar_incorrect++; hapmap_refvar_incorrect++;
System.out.printf(" INCORRECT REFVAR CALL"); //out.printf(" INCORRECT REFVAR CALL");
}else{ }else{
hapmap_refvar_correct++; hapmap_refvar_correct++;
} }
} }
out.print("\n"); //out.print("\n");
} }
} }