gatk-3.8/archive/java/src/org/broadinstitute/sting/vcftools/VCFTool.java

/*
 * Copyright (c) 2010 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

package org.broadinstitute.sting.playground.tools.vcf;
import org.broad.tribble.vcf.VCFGenotypeEncoding;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;

import org.broadinstitute.sting.utils.genotype.vcf.*;

import org.broadinstitute.sting.utils.GenomeLocParser;


import java.io.*;
import java.util.*;
import java.util.zip.*;

import net.sf.picard.util.Interval;
import net.sf.picard.reference.ReferenceSequenceFileWalker;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;


// First draft of a program for working with VCF files in various ways.


/**
 * @author jmaguire
 */


class VCFValidate extends CommandLineProgram
{
		@Argument(fullName = "vcf", shortName = "vcf", doc = "file to open", required = true) public String filename;
		@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = false;
		@Argument(fullName = "print", shortName = "print", doc = "print the vcf records to output", required = false) public Boolean print = false;
		@Argument(fullName = "profile", shortName = "profile", doc = "print performance information", required = false) public Boolean profile = false;
		@Argument(fullName = "out", shortName = "out", doc = "if --print, write to this file (default is /dev/stdout)", required = false) public String out = "/dev/stdout";

		@Override
		protected int execute()
		{
			System.out.println("Validating " + filename + "...");

			VCFReader reader = null;

			if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
			else { reader = new VCFReader(new File(filename)); }

			VCFHeader header = reader.getHeader();

			VCFWriter writer = null;
			if (print)
			{
				writer = new VCFWriter(new File(out));
				writer.writeHeader(header);
			}

			Date start_time = new Date();
			int n_records_processed = 0;
			while(reader.hasNext())
			{
				VCFRecord record = reader.next();
				if (print) { writer.addRecord(record); }

				if ((profile) && (n_records_processed % 10000 == 0))
				{
					Date current_time = new Date();
					long elapsed = current_time.getTime() - start_time.getTime();
					System.out.printf("RUNTIME: %d records processed in %f seconds; %f seconds per record.\n",
						n_records_processed,
						(double)elapsed/1000.0,
						((double)elapsed/1000.0)/(double)n_records_processed);
				}
				n_records_processed += 1;
			}

			if (print) { writer.close(); }

			if (autocorrect) { System.out.println(filename + " is VALID (after auto-correction)."); }
			else { System.out.println(filename + " is VALID."); }

			return 0;
		}
}

class VCFStats extends CommandLineProgram
{
		@Argument(fullName = "input", shortName = "input", doc = "file to read", required = true) public String in_filename;
		@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = false;
		@Argument(fullName = "locus", shortName = "locus", doc = "file listing loci to extract", required = true) public String locus_string;


		@Override
		protected int execute()
		{
			VCFReader reader = null;

			String[] tokens = locus_string.split("\\:|\\-");
			String chr   = tokens[0];
			String start = tokens[1];
			String stop  = tokens[2];
			Interval locus = new Interval(chr, Integer.parseInt(start), Integer.parseInt(stop));

			if (autocorrect)
                reader = new VCFReader(new File(in_filename),new VCFHomogenizer());
			else
                reader = new VCFReader(new File(in_filename));

			VCFHeader header = reader.getHeader();


			//////////////
			// Stats collectors
			int transitions = 0;
			int transversions = 0;
			int dbsnp = 0;
			int total_snps = 0;
			int[] AC_histogram    = new int[1000]; int highest_AC = 0;
			int[] DP_histogram    = new int[1000000]; int highest_DP = 0;

			int[] AC_transitions = new int[1000];
			int[] DP_transitions = new int[1000];

			int depth_sum = 0;

			boolean before = true;

			while(reader.hasNext())
			{
				VCFRecord record = null;
				try
				{
					record = reader.next();
				}
				catch (Exception e)
				{
					System.err.printf("WARNING: %s\n", e.toString());
					continue;
				}
				Interval this_locus = VCFTool.getIntervalFromRecord(record);
				if (locus.intersects(this_locus))
				{
					before = false;

					Map<String,String> info = record.getInfoValues();

					int AC = 0;
					int DP = 0;
					int DB = 0;

					if (info.containsKey("AC")) { AC = Integer.parseInt(info.get("AC")); }
					if (info.containsKey("DP")) { DP = Integer.parseInt(info.get("DP")); }
					if (info.containsKey("DB")) { DB = Integer.parseInt(info.get("DB")); }

					depth_sum += DP;

					dbsnp += DB; // 1 if in dbsnp, 0 otherwise

					AC_histogram[AC] += 1;
					if (AC > highest_AC) { highest_AC = AC; }

					DP_histogram[DP] += 1;
					if (DP > highest_DP) { highest_DP = DP; }

					if (VCFTool.isTransition(record)) { transitions += 1; AC_transitions[AC] += 1; DP_transitions[DP] += 1; }
					else { transversions += 1; }

					total_snps += 1;
					//System.out.printf("%s\n", record.toStringEncoding(header));
				}
				else if ((before == false) && (this_locus.compareTo(locus) > 0)) { break; }
			}

			double mean_depth = (double)depth_sum / (double)total_snps;
			double snp_rate = 1.0 / ((double)total_snps / (double)locus.length());

			int DP_running_sum = 0;
			int DP_1percent_low = -1;
			int DP_5percent_low = -1;
			for (int DP = 1; DP <= highest_DP; DP++)
			{
				if ((DP_1percent_low == -1) && (DP_running_sum >= 0.01*(double)total_snps)) { DP_1percent_low = DP; }
				if ((DP_5percent_low == -1) && (DP_running_sum >= 0.05*(double)total_snps)) { DP_5percent_low = DP; }
				DP_running_sum += DP_histogram[DP];
			}

			DP_running_sum = 0;
			int DP_1percent_high = -1;
			int DP_5percent_high = -1;
			for (int DP = highest_DP; DP >= 0; DP--)
			{
				if ((DP_1percent_high == -1) && (DP_running_sum >= 0.01*(double)total_snps)) { DP_1percent_high = DP; }
				if ((DP_5percent_high == -1) && (DP_running_sum >= 0.05*(double)total_snps)) { DP_5percent_high = DP; }
				DP_running_sum += DP_histogram[DP];
			}


			System.out.printf("Locus           : %s\n", locus.toString());
			System.out.printf("Total SNPs      : %d\n", total_snps);
			System.out.printf("SNP Rate        : 1/%f\n", snp_rate);
			System.out.printf("Ts/Tv           : %.02f\n", (double)transitions / (double)transversions);
			System.out.printf("%%dbsnp          : %.02f\n", 100.0 * (double)dbsnp / (double)total_snps);
			System.out.printf("Average Depth   : %f\n", mean_depth);
			System.out.printf("1%% Depth bounds : %d %d\n", DP_1percent_low, DP_1percent_high);
			System.out.printf("5%% Depth bounds : %d %d\n", DP_5percent_low, DP_5percent_high);
			System.out.printf("\n");

			System.out.printf("table\tAAF\tCount\tTs/Tv\n");
			for (int AC = 1; AC <= highest_AC; AC++)
			{
				System.out.printf("AAF\t%d\t%d\t%f\n", AC, AC_histogram[AC], (double)AC_transitions[AC]/(double)(AC_histogram[AC]-AC_transitions[AC]));
			}
			System.out.printf("\n");


			System.out.printf("DEPTH\ttable\tDepth\tCount\tTs/Tv\n");
			for (int DP = 1; DP <= highest_DP; DP++)
			{
				System.out.printf("%d\t%d\t%f\n", DP, DP_histogram[DP], (double)DP_transitions[DP]/(double)(DP_histogram[DP]-DP_transitions[DP]));
			}
			System.out.printf("\n");

			return 0;
		}

}

class CheckRefFields extends CommandLineProgram
{
		@Argument(fullName = "vcf", shortName = "vcf", doc = "file to open", required = true) public String filename;
		@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = false;
		@Argument(fullName = "fasta", shortName = "fasta", doc = "reference FASTA", required = true) public String fasta_filename;

		@Override
		protected int execute()
		{
			System.out.println("Checking " + filename + "...");

			VCFReader reader = null;

			if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
			else { reader = new VCFReader(new File(filename)); }

			ReferenceSequenceFileWalker ref = new ReferenceSequenceFileWalker(new File(fasta_filename));
			String ref_seq_name = "";
			byte[] ref_seq = null;
			SAMSequenceDictionary ref_dict = ref.getSequenceDictionary();

			VCFHeader header = reader.getHeader();

			Date start_time = new Date();
			int n_records_processed = 0;
			while(reader.hasNext())
			{
				VCFRecord record = reader.next();

				String chr = record.getChr();
				if (! chr.equals(ref_seq_name))
				{
					System.out.println("Loading " + chr);
					ref_seq = ref.get(ref_dict.getSequence(chr).getSequenceIndex()).getBases();
					ref_seq_name = chr;
				}

				long offset   = record.getStart();
				char vcf_ref_base = record.getReference().charAt(0);
				char fasta_ref_base = (char)ref_seq[(int)offset-1];

				List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
				char vcf_alt_base = alleles.get(0).getBases().charAt(0);

				//System.out.println(chr + " " + offset + " " + fasta_ref_base + " " + vcf_ref_base + " " + vcf_alt_base);

				String ans = null;
				if (vcf_ref_base != fasta_ref_base)
				{
					System.out.println("Error! Ref field does not match fasta. Fasta says " + fasta_ref_base);
					System.out.println(record.toStringEncoding(header));
				}
			}

			System.out.println("All reference fields correct.");
			return 0;
		}
}


class FixRefFields extends CommandLineProgram
{
		@Argument(fullName = "vcf", shortName = "vcf", doc = "file to open", required = true) public String filename;
		@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = false;
		@Argument(fullName = "fasta", shortName = "fasta", doc = "reference FASTA", required = true) public String fasta_filename;
		@Argument(fullName = "output", shortName = "output", doc = "output file", required = true) public String output_filename;

		@Override
		protected int execute()
		{
			System.out.println("Fixing " + filename + "...");

			VCFReader reader = null;

			if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
			else { reader = new VCFReader(new File(filename)); }

			ReferenceSequenceFileWalker ref = new ReferenceSequenceFileWalker(new File(fasta_filename));
			String ref_seq_name = "";
			byte[] ref_seq = null;
			SAMSequenceDictionary ref_dict = ref.getSequenceDictionary();


			VCFHeader header = reader.getHeader();

			PrintStream output;
			try
			{
				VCFWriter writer = new VCFWriter(new File(output_filename));
                writer.writeHeader(header);
				writer.close();
				output = new PrintStream(new FileOutputStream(output_filename, true));
			}
			catch (Exception e)
			{
				throw new RuntimeException(e);
			}

			Date start_time = new Date();
			int n_records_processed = 0;
			while(reader.hasNext())
			{
				VCFRecord record = reader.next();

				String chr = record.getChr();
				if (! chr.equals(ref_seq_name))
				{
					System.out.println("Loading " + chr);
					ref_seq = ref.get(ref_dict.getSequence(chr).getSequenceIndex()).getBases();
					ref_seq_name = chr;
				}

				long offset   = record.getStart();
				char vcf_ref_base = record.getReference().charAt(0);
				char fasta_ref_base = (char)ref_seq[(int)offset-1];

				List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
				char vcf_alt_base = alleles.get(0).getBases().charAt(0);

				//System.out.println(chr + " " + offset + " " + fasta_ref_base + " " + vcf_ref_base + " " + vcf_alt_base);

				String ans = null;
				if ((vcf_ref_base != fasta_ref_base) && ((vcf_alt_base == fasta_ref_base) || (vcf_alt_base == '.')))
				{
					// swap!
					String s = record.toStringEncoding(header);
					String[] tokens = s.split("\\s+");
					tokens[3] = Character.toString(fasta_ref_base);
					tokens[4] = Character.toString(vcf_ref_base);
					for (int i = 9; i < tokens.length; i++)
					{
						tokens[i] = tokens[i].replaceAll("0", "A");
						tokens[i] = tokens[i].replaceAll("1", "B");
						tokens[i] = tokens[i].replaceAll("B", "0");
						tokens[i] = tokens[i].replaceAll("A", "1");
					}

					ans = "";
					for (int i = 0; i < tokens.length; i++)
					{
						ans = ans + tokens[i] + "\t";
					}
					ans.replaceAll("\\s+$", "");

					//System.out.println("from: " + s);
					//System.out.println("to: " + ans);
				}
				else
				{
					ans = record.toStringEncoding(header);
				}

				output.println(ans);
			}

			output.flush();
			output.close();

			System.out.println("Done.");
			return 0;
		}
}

class VCFGrep extends CommandLineProgram
{
		@Argument(fullName = "input", shortName = "input", doc = "file to read", required = true) public String in_filename;
		@Argument(fullName = "output", shortName = "output", doc = "file to write", required = true) public String out_filename;
		@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = false;
		@Argument(fullName = "loci", shortName = "loci", doc = "file listing loci to extract", required = true) public String loci_filename;

		@Override
		protected int execute()
		{
			HashSet<String> loci = new HashSet<String>();
			try
			{
				Scanner loci_reader;

				if (loci_filename.endsWith(".gz")) { loci_reader = new Scanner(new GZIPInputStream(new FileInputStream(loci_filename))); }
				else { loci_reader = new Scanner(new File(loci_filename)); }

				while(loci_reader.hasNextLine())
				{
					String line = loci_reader.nextLine();
					line = line.replaceAll("\\s+", "");
					loci.add(line);
				}
			}
			catch (Exception e)
			{
				throw new RuntimeException(e);
			}

			try
			{
				PrintStream output = new PrintStream(new File(out_filename));

				Scanner reader;
				if (in_filename.endsWith(".gz")) { reader = new Scanner(new GZIPInputStream(new FileInputStream(in_filename))); }
				else { reader = new Scanner(new File(in_filename)); }
				while(reader.hasNextLine())
				{
					String line = reader.nextLine();

					if (line.matches("^\\#.*$")) { output.print(line + "\n"); continue; }

					String[] tokens = line.split("\\s+");
					String locus = tokens[0] + ":" + tokens[1];
					if (loci.contains(locus)) { output.print(line + "\n"); continue; }
				}
			}
			catch (Exception e)
			{
				throw new RuntimeException(e);
			}

			return 0;
		}

}

class VCFGrep_old extends CommandLineProgram
{
		@Argument(fullName = "input", shortName = "input", doc = "file to read", required = true) public String in_filename;
		@Argument(fullName = "output", shortName = "output", doc = "file to write", required = true) public String out_filename;
		@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = false;
		@Argument(fullName = "loci", shortName = "loci", doc = "file listing loci to extract", required = true) public String loci_filename;

		@Override
		protected int execute()
		{
			VCFReader reader = null;
			VCFWriter writer = null;

			HashSet<Interval> loci = new HashSet<Interval>();
			try
			{
				Scanner loci_reader = new Scanner(new File(loci_filename));
				while(loci_reader.hasNextLine())
				{
					String line = loci_reader.nextLine();
					String[] tokens = line.split("\\:");

					String chr = tokens[0];
					String off = tokens[1];
					loci.add(new Interval(chr, Integer.parseInt(off), Integer.parseInt(off)));
				}
			}
			catch (Exception e)
			{
				throw new RuntimeException(e);
			}

			if (autocorrect) { reader = new VCFReader(new File(in_filename),new VCFHomogenizer()); }
			else { reader = new VCFReader(new File(in_filename)); }


            writer = new VCFWriter(new File(out_filename));
            writer.writeHeader(reader.getHeader());

			while(reader.hasNext())
			{
				VCFRecord record = reader.next();
				Interval locus = VCFTool.getIntervalFromRecord(record);
				if (loci.contains(locus)) { writer.addRecord(record); }
			}
			writer.close();

			return 0;
		}

}

class PrintGQ extends CommandLineProgram
{
		@Argument(fullName = "vcf", shortName = "vcf", doc = "file to open", required = true) public String filename;

		@Override
		protected int execute()
		{
			VCFReader reader;
			VCFReader reader2;

				reader = new VCFReader(new File(filename),new VCFHomogenizer());

			VCFHeader header = reader.getHeader();
			VCFRecord record = reader.next();

			while(true)
			{
				if (record == null) { break; }

				Interval interval = VCFTool.getIntervalFromRecord(record);

					if (record.isFiltered())
					{
						record = reader.next();
					}

					char ref = record.getReference().charAt(0);

					String[] sample_names = record.getSampleNames();

					List<VCFGenotypeRecord> genotypes = record.getVCFGenotypeRecords();

					for (int i = 0; i < sample_names.length; i++)
					{
						VCFGenotypeRecord rec = genotypes.get(i);

						String gq = rec.getFields().get("GQ");

						List<VCFGenotypeEncoding> alleles = rec.getAlleles();

						String g = "";

						for (int j = 0; j < alleles.size(); j++) { g += alleles.get(j).getBases(); }
						char[] c = g.toCharArray();

						Arrays.sort(c);

						g = new String(c);

						System.out.println(g + " " + gq);
					}

					record = reader.next();
			}

			return 0;
		}
}

class VCFSimpleStats extends CommandLineProgram
{
		@Argument(fullName = "vcf1", shortName = "vcf1", doc = "file to open", required = true) public String filename1;
		@Argument(fullName = "out", shortName = "out", doc = "file to write results to", required = true) public String output_filename;
		@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = false;
		@Argument(fullName = "verbose", shortName = "verbose", doc = "print extremely detailed stats", required = false) public Boolean verbose = false;
		@Argument(fullName = "min_call_rate", shortName = "min_call_rate", doc = "what fraction of samples must have a call", required = false) public double min_call_rate = 0.9;

		@Override
		protected int execute()
		{
			//System.out.println("Loading " + filename + "...");

			PrintStream output = null;
			try
			{
				output = new PrintStream(new FileOutputStream(output_filename));
			}
			catch (Exception e)
			{
				throw new RuntimeException(e);
			}

			VCFReader reader1;

			if (autocorrect)
			{
				reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
			}
			else
			{
				reader1 = new VCFReader(new File(filename1));
			}

			VCFHeader header1 = reader1.getHeader();

			VCFRecord record1 = reader1.next();

			int TP = 0;
			int FP = 0;
			int TN = 0;
			int FN = 0;
			int total = 0;
			int dropped = 0;

			int ts = 0;
			int tv = 0;

			while(true)
			{
				if (record1 == null) { break; }

				Interval interval1 = VCFTool.getIntervalFromRecord(record1);

					// (unless it is "filtered")
					if (record1.isFiltered())
					{
						record1 = reader1.next();
					}

					char ref = record1.getReference().charAt(0);


					String[] sample_names1 = record1.getSampleNames();

					List<VCFGenotypeRecord> genotypes1 = record1.getVCFGenotypeRecords();

					long n_ref_1 = 0;
					long n_alt_1 = 0;
					long n_total_1 = 0;
					long n_calls_1 = 0;
					long n_dropped_1 = 0;

					for (int i = 0; i < sample_names1.length; i++)
					{
						VCFGenotypeRecord rec1 = genotypes1.get(i);

						//if (rec2 == null) { continue; }

						Long gq1;

						if (rec1.getFields().get("GQ") != null)
						{
							Double gq1_double = Double.parseDouble(rec1.getFields().get("GQ"));
							gq1 = gq1_double.longValue();
						}
						else
						{
							gq1 = 0L;
						}

						List<VCFGenotypeEncoding> alleles1 = rec1.getAlleles();

						String g1 = "";

						for (int j = 0; j < alleles1.size(); j++) { g1 += alleles1.get(j).getBases(); }

						char[] c1 = g1.toCharArray();

						Arrays.sort(c1);

						g1 = new String(c1);

						n_total_1 += 1;

						if (g1.equals(".."))
						{
							n_dropped_1 += 1;
							continue;
						}

						n_calls_1 += 1;

						if (g1.charAt(0) == ref) { n_ref_1 += 1; } else { n_alt_1 += 1; }
						if (g1.charAt(1) == ref) { n_ref_1 += 1; } else { n_alt_1 += 1; }
					}

					if (((double)n_calls_1 / (double)n_total_1) >= min_call_rate)
					{
						if (n_alt_1 == 0) { FP += 1; }
						if (n_alt_1 >  0) { TP += 1; }
						total += 1;

						if (VCFTool.isTransition(record1)) { ts += 1; }
						else { tv += 1; }
					}
					else
					{
						dropped += 1;
					}

					if ((verbose) && (((double)n_calls_1 / (double)n_total_1) >= min_call_rate))
					{
						//output.printf("SNP "
						//				+ interval1.toString()
						//				+ " " + n_total_1 + " " + n_calls_1 + " " +  (double)n_calls_1/(double)n_total_1 + " " + n_ref_1 + " " + n_alt_1 + "\n");
						if (n_alt_1 == 0) { output.printf("FP: %s\n", interval1.toString()); }
						if (n_alt_1 != 0) { output.printf("TP: %s\n", interval1.toString()); }
					}

					record1 = reader1.next();
			}


			// Now output the statistics.

			output.printf("TP FP dropped ts tv ts/tv\n%d(%f) %d(%f) %d %d %d %f\n",
							TP, (double)TP/(double)total,
							FP, (double)FP/(double)total,
							dropped,
							ts, tv,
							(double)ts/(double)tv);

			output.flush();
			output.close();


			return 0;
		}
}


class VCFConcordance extends CommandLineProgram
{
		@Argument(fullName = "vcf1", shortName = "vcf1", doc = "file to open", required = true) public String filename1;
		@Argument(fullName = "vcf2", shortName = "vcf2", doc = "file to open", required = true) public String filename2;
		@Argument(fullName = "out", shortName = "out", doc = "file to write results to", required = true) public String output_filename;
		@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = false;
		@Argument(fullName = "verbose", shortName = "verbose", doc = "print extremely detailed stats", required = false) public Boolean verbose = false;
		@Argument(fullName = "list_genotypes", shortName = "list_genotypes", doc = "print each person's genotype for debugging", required = false) public Boolean list_genotypes = false;
		@Argument(fullName = "qual_threshold", shortName = "qual_threshold", doc = "minimum genotype quality to consider", required = false) public long qual_threshold = 1;
		@Argument(fullName = "samples", shortName = "samples", doc = "optional list of individuals to score", required = false) public String samples_filename = null;
		@Argument(fullName = "r2_bin_size", shortName = "r2_bin_size", doc = "size of an r2 bin for calculating error rates", required = false) public double r2_bin_size = 0.01;


		@Override
		protected int execute()
		{
			//System.out.println("Loading " + filename + "...");

			/////////////////////////////////
			// All the various concordance counters

			HashMap<String,GenotypeConcordance> individual = new HashMap<String,GenotypeConcordance>();
			HashMap<Long,GenotypeConcordance>   AAF        = new HashMap<Long,GenotypeConcordance>();
			HashMap<Long,GenotypeConcordance>   Qual       = new HashMap<Long,GenotypeConcordance>();
			HashMap<Long,GenotypeConcordance>   R2         = new HashMap<Long,GenotypeConcordance>();

			int shared_ts    = 0;
			int shared_tv    = 0;
			int shared_dbsnp = 0;
			int shared_total = 0;

			int unique1_ts    = 0;
			int unique1_tv    = 0;
			int unique1_dbsnp = 0;
			int unique1_total = 0;

			int unique2_ts    = 0;
			int unique2_tv    = 0;
			int unique2_dbsnp = 0;
			int unique2_total = 0;

			//
			/////////////////////////////////

			HashSet<String> sample_mask = new HashSet<String>();
			if (samples_filename != null)
			{
				Scanner samples_reader = null;
				try
				{
					samples_reader = new Scanner(new File(samples_filename));
				}
				catch (Exception e)
				{
					throw new RuntimeException(e);
				}
				while(samples_reader.hasNextLine())
				{
					String line = samples_reader.nextLine();
					line.replaceAll("^\\s+|\\s+$", "");
					sample_mask.add(line);
				}
			}


			PrintStream output = null;
			try
			{
				output = new PrintStream(new FileOutputStream(output_filename));
			}
			catch (Exception e)
			{
				throw new RuntimeException(e);
			}

			VCFReader reader1;
			VCFReader reader2;

			if (autocorrect)
			{
				reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
				reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
			}
			else
			{
				reader1 = new VCFReader(new File(filename1));
				reader2 = new VCFReader(new File(filename2));
			}

			VCFHeader header1 = reader1.getHeader();
			VCFHeader header2 = reader2.getHeader();

			VCFRecord record1 = reader1.next();
			VCFRecord record2 = reader2.next();

			int number_sites_unique_to_file1 = 0;
			int number_sites_unique_to_file2 = 0;
			int number_sites_shared          = 0;

			while(true)
			{
				if (record1 == null) { break; }
				if (record2 == null) { break; }


				Interval interval1 = VCFTool.getIntervalFromRecord(record1);
				Interval interval2 = VCFTool.getIntervalFromRecord(record2);

				//int comparison = interval1.compareTo(interval2);
				int comparison = VCFTool.compareIntervals(interval1, interval2);

				//System.out.println("DBG: " + interval1 + " " + interval2 + " " + comparison);

				if (comparison == 0)
				{
					// records match! compute concordance.

					// (unless one of them is "filtered")
					if (record1.isFiltered() || record2.isFiltered())
					{
						record1 = reader1.next();
						record2 = reader2.next();
						continue;
					}


					char ref = record1.getReference().charAt(0);

					String[] sample_names1 = record1.getSampleNames();
					String[] sample_names2 = record2.getSampleNames();


					Map<String,String> info1 = record1.getInfoValues();
					Map<String,String> info2 = record2.getInfoValues();
					double r2_1 = 0;
					double r2_2 = 0;
					if (info1.containsKey("R2")) { r2_1 = Double.parseDouble(info1.get("R2")); }
					if (info2.containsKey("R2")) { r2_2 = Double.parseDouble(info2.get("R2")); }


					number_sites_shared += 1;
					if (VCFTool.isTransition(record1)) { shared_ts += 1; }
					else { shared_tv += 1; }
					if ((info1.get("DB") != null) && (Integer.parseInt(info1.get("DB")) == 1)) { shared_dbsnp += 1; }
					shared_total += 1;


					List<VCFGenotypeRecord> genotypes1 = record1.getVCFGenotypeRecords();
					List<VCFGenotypeRecord> genotypes2 = record2.getVCFGenotypeRecords();

					Map<String, VCFGenotypeRecord> map2 = new HashMap<String, VCFGenotypeRecord>();
					for (int i = 0; i < genotypes2.size(); i++)
					{
						map2.put(genotypes2.get(i).getSampleName(), genotypes2.get(i));
					}

					GenotypeConcordance SNP = new GenotypeConcordance(interval1.toString());

					long n_ref = 0;
					long n_alt = 0;

					for (int i = 0; i < sample_names1.length; i++)
					{
						if ((samples_filename != null) &&
							(! sample_mask.contains(sample_names1[i])))
						{
							continue;
						}


						VCFGenotypeRecord rec1 = genotypes1.get(i);
						VCFGenotypeRecord rec2 = map2.get(sample_names1[i]);

						if (rec2 == null) { continue; }

						Long gq1;
						if (rec1.getFields().get("GQ") != null)
						{
							Double gq1_double = Double.parseDouble(rec1.getFields().get("GQ"));
							gq1 = gq1_double.longValue();
						}
						else
						{
							gq1 = 0L;
						}

						Long gq2;
						if (rec2.getFields().get("GQ") != null)
						{
							Double gq2_double = Double.parseDouble(rec2.getFields().get("GQ"));
							gq2 = gq2_double.longValue();
						}
						else
						{
							gq2 = 0L;
						}

						List<VCFGenotypeEncoding> alleles1 = rec1.getAlleles();
						List<VCFGenotypeEncoding> alleles2 = rec2.getAlleles();

						String g1 = "";
						String g2 = "";

						for (int j = 0; j < alleles1.size(); j++) { g1 += alleles1.get(j).getBases(); }
						for (int j = 0; j < alleles2.size(); j++) { g2 += alleles2.get(j).getBases(); }

						char[] c1 = g1.toCharArray();
						char[] c2 = g2.toCharArray();

						Arrays.sort(c1);
						Arrays.sort(c2);

						g1 = new String(c1);
						g2 = new String(c2);

						if (list_genotypes)
						{
							String flag = "";
							if (! g1.equals(g2)) { flag = "X"; }
							output.printf("GENOTYPES "
												+ interval1.toString()
												+ " " + sample_names1[i]
												+ " " + g1
												+ " " + g2
												+ " " + gq1
												+ " " + gq2
												+ " " + flag +  "\n");
						}

						if ((g1.equals("..")) ||
							(g2.equals("..")))
						{
							continue;
						}

						if (g1.charAt(0) == ref) { n_ref += 1; } else { n_alt += 1; }
						if (g1.charAt(1) == ref) { n_ref += 1; } else { n_alt += 1; }

						if (! individual.containsKey(sample_names1[i])) { individual.put(sample_names1[i], new GenotypeConcordance(sample_names1[i])); }
						if (! Qual.containsKey(gq1)) { Qual.put(gq1, new GenotypeConcordance(Long.toString(gq1))); }

						individual.get(sample_names1[i]).add(ref, g1, g2);
						Qual.get(gq1).add(ref, g1, g2);
						SNP.add(ref, g1, g2);

					}

					if (verbose)
					{
						//output.printf("SNP " + SNP.toString());
						output.printf("SNP " + SNP.toLine());
					}

					if (! AAF.containsKey(n_alt)) { AAF.put(n_alt, new GenotypeConcordance(Long.toString(n_alt))); }
					AAF.get(n_alt).add(SNP);

					long r2_index = (long)(r2_1 / r2_bin_size);
					if (! R2.containsKey(r2_index)) { R2.put(r2_index, new GenotypeConcordance(Double.toString(r2_1))); }
					R2.get(r2_index).add(SNP);

					//System.out.printf("DBG: %f %f\n", r2_1, r2_2);
					//System.out.printf("DBG: %f %d %s\n", r2_1, r2_index, SNP.toString());

					record1 = reader1.next();
					record2 = reader2.next();
				}
				else if (comparison > 0)
				{
					if (record2.isFiltered()) { record2 = reader2.next(); continue; }

					// interval1 is later than interval2.
					Map<String,String> info2 = record2.getInfoValues();
					number_sites_unique_to_file2 += 1;
					if (VCFTool.isTransition(record2)) { unique2_ts += 1; }
					else { unique2_tv += 1; }
					if ((info2.get("DB") != null) && (Integer.parseInt(info2.get("DB")) == 1)) { unique2_dbsnp += 1; }
					unique2_total += 1;

					//if (verbose) { output.printf("DBG: skipping %s\n", record2.toStringEncoding(header2)); }

					record2 = reader2.next();
				}
				else if (comparison < 0)
				{
					if (record1.isFiltered()) { record1 = reader1.next(); continue; }

					// interval2 is later than interval1.
					Map<String,String> info1 = record1.getInfoValues();
					number_sites_unique_to_file1 += 1;
					if (VCFTool.isTransition(record1)) { unique1_ts += 1; }
					else { unique1_tv += 1; }
					if ((info1.get("DB") != null) && (Integer.parseInt(info1.get("DB")) == 1)) { unique1_dbsnp += 1; }
					unique1_total += 1;

					//if (verbose) { output.printf("DBG: skipping %s\n", record1.toStringEncoding(header1)); }

					record1 = reader1.next();
				}
			}


			// Now output the statistics.
				if (verbose)
				{
					output.printf("\n");
					Object[] individuals = individual.keySet().toArray();
					for (int i = 0; i < individuals.length; i++)
					{
						String ind = (String)individuals[i];
						output.print("INDIVIDUAL " + individual.get(ind).toString());
					}

					output.printf("\n");
					Object[] AAFs = AAF.keySet().toArray();
					for (int i = 0; i < AAFs.length; i++)
					{
						Long aaf = (Long)AAFs[i];
						output.print("AAF " + AAF.get(aaf).toString());
					}

					output.printf("\n");
					Object[] quals = Qual.keySet().toArray();
					for (int i = 0; i < quals.length; i++)
					{
						Long qual = (Long)quals[i];
						output.print("QUAL " + Qual.get(qual).toString());
					}
					output.printf("\n");

					output.printf("\n");
					Object[] R2s = R2.keySet().toArray();
					for (int i = 0; i < AAFs.length; i++)
					{
						Long r2 = (Long)R2s[i];
						output.print("R2 " + R2.get(r2).toString());
					}
				}

				output.printf("Number of sites shared          : %d %f %f\n", number_sites_shared,
																				(double)shared_ts/(double)shared_tv,
																				(double)shared_dbsnp/(double)(shared_ts+shared_tv));

				output.printf("Number of sites unique to %s: %d %f %f\n", filename1, number_sites_unique_to_file1,
																				(double)unique1_ts/(double)unique1_tv,
																				(double)unique1_dbsnp/(double)(unique1_ts+unique1_tv));

				output.printf("Number of sites unique to %s: %d %f %f\n", filename2, number_sites_unique_to_file2,
																				(double)unique2_ts/(double)unique2_tv,
																				(double)unique2_dbsnp/(double)(unique2_ts+unique2_tv));

				output.printf("\n");
				Object[] individuals = individual.keySet().toArray();
				for (int i = 0; i < individuals.length; i++)
				{
					String ind = (String)individuals[i];
					output.printf("INDIVIDUAL %s %f %d %d\n", ind, individual.get(ind).errorRate(), individual.get(ind).total(), individual.get(ind).totalNonHomRef());
				}

				output.printf("\n");
				Object[] AAFs = AAF.keySet().toArray();
				for (int i = 0; i < AAFs.length; i++)
				{
					Long aaf = (Long)AAFs[i];
					output.printf("AAF %d %f %d %d %f\n", aaf, AAF.get(aaf).errorRate(), AAF.get(aaf).total(), AAF.get(aaf).totalNonHomRef(), AAF.get(aaf).hetErrorRate());
				}

				output.printf("\n");
				Object[] quals = Qual.keySet().toArray();
				for (int i = 0; i < quals.length; i++)
				{
					Long qual = (Long)quals[i];
					output.printf("QUAL %d %f %d %d\n", qual, Qual.get(qual).errorRate(), Qual.get(qual).total(), Qual.get(qual).totalNonHomRef());
				}

				output.printf("\n");
				Object[] R2s = R2.keySet().toArray();
				for (int i = 0; i < R2s.length; i++)
				{
					Long r2 = (Long)R2s[i];
					output.printf("R2 %f %f %d %d\n", (double)r2 * r2_bin_size, R2.get(r2).errorRate(), R2.get(r2).total(), R2.get(r2).totalNonHomRef());
				}

			output.flush();
			output.close();


			return 0;
		}
}

public class VCFTool
{
		public static void main(String args[])
		{
			// silence log4j messages.
			//appender = new FileAppender(layout, clp.toFile, false);
			//logger.addAppender(appender);

			SetupSequenceDictionary();

			String mode = args[0];
			String[] realArgs = Arrays.copyOfRange(args, 1, args.length);

			if (mode.equals("validate"))
			{
				VCFValidate cm = new VCFValidate();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("grep"))
			{
				VCFGrep cm = new VCFGrep();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("concordance"))
			{
				VCFConcordance cm = new VCFConcordance();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("simple_stats"))
			{
				VCFSimpleStats cm = new VCFSimpleStats();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("printGQ"))
			{
				PrintGQ cm = new PrintGQ();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("fix_ref_fields"))
			{
				FixRefFields cm = new FixRefFields();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("check_ref_fields"))
			{
				CheckRefFields cm = new CheckRefFields();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("stats"))
			{
				VCFStats cm = new VCFStats();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("sequenom"))
			{
				VCFSequenomAnalysis cm = new VCFSequenomAnalysis();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("sequenom2"))
			{
				VCFSequenomAnalysis2 cm = new VCFSequenomAnalysis2();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("call_rates"))
			{
				VCFCallRates cm = new VCFCallRates();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("optimize"))
			{
				VCFOptimize cm = new VCFOptimize();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("apply_cuts"))
			{
				VCFApplyCuts cm = new VCFApplyCuts();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			if (mode.equals("merge"))
			{
				VCFMerge cm = new VCFMerge();
				CommandLineProgram.start(cm,realArgs);
				System.exit(0);
			}

			System.out.printf("ERROR: mode %s not defined.\n", mode);
			System.exit(-1);

		}


		/////////////////////////
		// Some helpful utilities.

		// Total hack to set up a sequence dictionary for 1kG hg18/build36 without needing to load a fasta.
		public static SAMSequenceDictionary dict;
		public static void SetupSequenceDictionary()
		{
			dict = new SAMSequenceDictionary();
			for (int i = 1; i <= 22; i++)
			{
				dict.addSequence(new SAMSequenceRecord(String.format("%d", i)));
			}
			dict.addSequence(new SAMSequenceRecord("X"));
			dict.addSequence(new SAMSequenceRecord("Y"));
			dict.addSequence(new SAMSequenceRecord("M"));
			GenomeLocParser.setupRefContigOrdering(dict);
		}

		public static Interval getIntervalFromRecord(VCFRecord record)
		{
			String chr = record.getChr();
			long   off = record.getStart();
			return new Interval(chr, (int)off, (int)off);
		}

		public static char getAlt(VCFRecord record)
		{
			List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
			char alt = alleles.get(0).getBases().charAt(0);
			return alt;
		}

		public static boolean isTransition(VCFRecord record)
		{
			char ref = record.getReference().charAt(0);
			List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
			char alt = alleles.get(0).getBases().charAt(0);

			if (((ref == 'A') && (alt == 'G')) ||
				((ref == 'G') && (alt == 'A')) ||
				((ref == 'C') && (alt == 'T')) ||
				((ref == 'T') && (alt == 'C')))
			{
				return true;
			}
			else
			{
				return false;
			}
		}


		public static int Compute_n_total(VCFRecord record)
		{
			return VCFTool.Compute_n_total(record, (String[])null);
		}

		public static int Compute_n_total(VCFRecord record, String[] sample_names)
		{
			HashSet<String> set = null;
			if (sample_names != null)
			{
				set = new HashSet<String>();
				for (int i = 0; i < sample_names.length; i++) { set.add(sample_names[i]); }
			}
			return VCFTool.Compute_n_total(record, set);
		}

		public static int Compute_n_total(VCFRecord record, Set<String> sample_mask)
		{
			String[] sample_names = record.getSampleNames();
			List<VCFGenotypeRecord> genotypes = record.getVCFGenotypeRecords();
			int n_ref = 0;
			int n_alt = 0;
			for (int i = 0; i < sample_names.length; i++)
			{
				if ((sample_mask != null) && (! sample_mask.contains(sample_names[i])))
				{
					continue;
				}

				VCFGenotypeRecord rec = genotypes.get(i);
				List<VCFGenotypeEncoding> alleles = rec.getAlleles();
				String g = "";
				for (int j = 0; j < alleles.size(); j++) { g += alleles.get(j).getBases(); }
				char[] c = g.toCharArray();
				Arrays.sort(c);
				g = new String(c);
				if (g.equals("..")) { continue; }
				if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
				if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
			}
			return n_alt + n_ref;
		}

		public static int Compute_n_alt(VCFRecord record)
		{
			return VCFTool.Compute_n_alt(record, (String[])null);
		}

		public static int Compute_n_alt(VCFRecord record, String[] sample_names)
		{
			HashSet<String> set = null;
			if (sample_names != null)
			{
				set = new HashSet<String>();
				for (int i = 0; i < sample_names.length; i++) { set.add(sample_names[i]); }
			}
			return VCFTool.Compute_n_alt(record, set);
		}

		public static int Compute_n_alt(VCFRecord record, Set<String> sample_mask)
		{
			String[] sample_names = record.getSampleNames();
			List<VCFGenotypeRecord> genotypes = record.getVCFGenotypeRecords();
			int n_ref = 0;
			int n_alt = 0;
			for (int i = 0; i < sample_names.length; i++)
			{
				// Skip samples we should skip.
				if ((sample_mask != null) && (! sample_mask.contains(sample_names[i])))
				{
					continue;
				}

				VCFGenotypeRecord rec = genotypes.get(i);
				List<VCFGenotypeEncoding> alleles = rec.getAlleles();
				String g = "";
				for (int j = 0; j < alleles.size(); j++) { g += alleles.get(j).getBases(); }
				char[] c = g.toCharArray();
				Arrays.sort(c);
				g = new String(c);
				if (g.equals("..")) { continue; }
				if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
				if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
			}
			return n_alt;
		}


		public static int Compute_n_het(VCFRecord record)
		{
			return VCFTool.Compute_n_het(record, (String[])null);
		}

		public static int Compute_n_het(VCFRecord record, String[] sample_names)
		{
			HashSet<String> set = null;
			if (sample_names != null)
			{
				set = new HashSet<String>();
				for (int i = 0; i < sample_names.length; i++) { set.add(sample_names[i]); }
			}
			return VCFTool.Compute_n_het(record, set);
		}

		public static int Compute_n_het(VCFRecord record, Set<String> sample_mask)
		{
			String[] sample_names = record.getSampleNames();
			List<VCFGenotypeRecord> genotypes = record.getVCFGenotypeRecords();
			int n_het = 0;
			for (int i = 0; i < sample_names.length; i++)
			{
				// Skip samples we should skip.
				if ((sample_mask != null) && (! sample_mask.contains(sample_names[i])))
				{
					continue;
				}

				int n_ref = 0;
				int n_alt = 0;

				VCFGenotypeRecord rec = genotypes.get(i);
				List<VCFGenotypeEncoding> alleles = rec.getAlleles();
				String g = "";
				for (int j = 0; j < alleles.size(); j++) { g += alleles.get(j).getBases(); }
				char[] c = g.toCharArray();
				Arrays.sort(c);
				g = new String(c);
				if (g.equals("..")) { continue; }
				if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
				if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
				if (n_alt == 1) { n_het += 1; }
			}
			return n_het;
		}

		public static double Compute_failure_rate(VCFRecord record)
		{
			String[] sample_names = record.getSampleNames();
			List<VCFGenotypeRecord> genotypes = record.getVCFGenotypeRecords();
			double failure_rate = 0.0;
			for (int i = 0; i < sample_names.length; i++)
			{
				VCFGenotypeRecord rec = genotypes.get(i);
				List<VCFGenotypeEncoding> alleles = rec.getAlleles();
				String g = "";
				for (int j = 0; j < alleles.size(); j++) { g += alleles.get(j).getBases(); }
				char[] c = g.toCharArray();
				Arrays.sort(c);
				g = new String(c);
				if (g.equals("..")) { failure_rate += 1; continue; }
			}
			return failure_rate / (double)sample_names.length;
		}

		public static double Compute_HWE(VCFRecord record)
		{
			return VCFTool.Compute_HWE(record, (String[])null);
		}

		public static double Compute_HWE(VCFRecord record, String[] sample_names)
		{
			HashSet<String> set = null;
			if (sample_names != null)
			{
				set = new HashSet<String>();
				for (int i = 0; i < sample_names.length; i++) { set.add(sample_names[i]); }
			}
			return VCFTool.Compute_HWE(record, set);
		}

		public static double Compute_HWE(VCFRecord record, Set<String> sample_mask)
		{
			int ref = 0;
			int het = 0;
			int hom = 0;
			int N   = 0;

			String[] sample_names = record.getSampleNames();
			List<VCFGenotypeRecord> genotypes = record.getVCFGenotypeRecords();
			for (int i = 0; i < sample_names.length; i++)
			{
				// Skip samples we should skip.
				if ((sample_mask != null) && (! sample_mask.contains(sample_names[i])))
				{
					continue;
				}

				int n_ref = 0;
				int n_alt = 0;

				VCFGenotypeRecord rec = genotypes.get(i);
				List<VCFGenotypeEncoding> alleles = rec.getAlleles();
				String g = "";
				for (int j = 0; j < alleles.size(); j++) { g += alleles.get(j).getBases(); }
				char[] c = g.toCharArray();
				Arrays.sort(c);
				g = new String(c);
				if (g.equals("..")) { continue; }
				if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
				if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }

				if (n_ref == 2)                    { ref += 1; }
				else if (n_ref == 1 && n_alt == 1) { het += 1; }
				else if (n_alt == 2)               { hom += 1; }

				N += 1;
			}

			double p = (2.0 * ref + het) / (2.0 * (ref + het + hom));
			double q = 1.0 - p;

			//System.out.printf("DBG: p=%f q=%f ref=%d het=%d hom=%d\n", p, q, ref, het, hom);

			double expected_ref = p * p * N;
			double expected_het = 2.0 * p * q * N;
			double expected_hom = q * q * N;

			double chi_squared = (Math.pow(ref - expected_ref,2)/expected_ref) + (Math.pow(het - expected_het,2)/expected_het) + (Math.pow(hom - expected_hom,2)/expected_hom);

			return chi_squared;
		}

		// This function assumes a 1-degree of freedom chi-squared.
		public static double P_from_Chi(double chi)
		{
			double gamma = 1.772454;
			double a = Math.pow(2,0.5) * gamma;
			double b = Math.pow(chi, 0.5-1.0) * Math.exp((-1.0 * chi)/2.0);
			double ans = (1.0/a) * b;
			return ans;
		}

		public static int compareIntervals(Interval a, Interval b)
		{
			int chr_a;
			int chr_b;

			if (a.getSequence().equals("X")) { chr_a = 23; }
			else if (a.getSequence().equals("Y")) { chr_a = 24; }
			else if (a.getSequence().equals("M")) { chr_a = 25; }
			else { chr_a = Integer.parseInt(a.getSequence()); }

			if (b.getSequence().equals("X")) { chr_b = 23; }
			else if (b.getSequence().equals("Y")) { chr_b = 24; }
			else if (b.getSequence().equals("M")) { chr_b = 25; }
			else { chr_b = Integer.parseInt(b.getSequence()); }

			int start_a = a.getStart();
			int start_b = b.getStart();

			int end_a = a.getEnd();
			int end_b = b.getEnd();

			if (chr_a < chr_b) { return -1; }
			else if (chr_a > chr_b) { return 1; }
			else if (start_a < start_b) { return -1; }
			else if (start_a > start_b) { return 1; }
			else if (end_a < end_b) { return -1; }
			else if (end_a > end_b) { return 1; }
			else { return 0; }
		}

}