178 lines
5.7 KiB
Java
178 lines
5.7 KiB
Java
|
|
/*
|
||
|
|
* Copyright (c) 2010 The Broad Institute
|
||
|
|
*
|
||
|
|
* Permission is hereby granted, free of charge, to any person
|
||
|
|
* obtaining a copy of this software and associated documentation
|
||
|
|
* files (the "Software"), to deal in the Software without
|
||
|
|
* restriction, including without limitation the rights to use,
|
||
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
|
|
* copies of the Software, and to permit persons to whom the
|
||
|
|
* Software is furnished to do so, subject to the following
|
||
|
|
* conditions:
|
||
|
|
*
|
||
|
|
* The above copyright notice and this permission notice shall be
|
||
|
|
* included in all copies or substantial portions of the Software.
|
||
|
|
*
|
||
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||
|
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||
|
|
*/
|
||
|
|
|
||
|
|
package org.broadinstitute.sting.playground.tools.vcf;
|
||
|
|
import org.broad.tribble.vcf.VCFHeader;
|
||
|
|
import org.broad.tribble.vcf.VCFRecord;
|
||
|
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||
|
|
import org.broadinstitute.sting.commandline.Argument;
|
||
|
|
|
||
|
|
import java.io.*;
|
||
|
|
|
||
|
|
import net.sf.picard.util.Interval;
|
||
|
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||
|
|
|
||
|
|
|
||
|
|
class VCFSequenomAnalysis extends CommandLineProgram
|
||
|
|
{
|
||
|
|
@Argument(fullName = "sequenom", shortName = "sequenom", doc = "file to open", required = true) public String filename1;
|
||
|
|
@Argument(fullName = "sequencing", shortName = "sequencing", doc = "file to open", required = true) public String filename2;
|
||
|
|
@Argument(fullName = "out", shortName = "out", doc = "file to write results to", required = false) public String output_filename = "/dev/stdout";
|
||
|
|
@Argument(fullName = "auto_correct", shortName = "auto_correct", doc = "auto-correct the VCF file if it's off-spec", required = false) public Boolean autocorrect = true;
|
||
|
|
@Argument(fullName = "verbose", shortName = "verbose", doc = "print extremely detailed stats", required = false) public Boolean verbose = false;
|
||
|
|
@Argument(fullName = "qual_threshold", shortName = "qual_threshold", doc = "minimum genotype quality to consider", required = false) public long qual_threshold = 1;
|
||
|
|
|
||
|
|
|
||
|
|
@Override
|
||
|
|
protected int execute()
|
||
|
|
{
|
||
|
|
//System.out.println("Loading " + filename + "...");
|
||
|
|
|
||
|
|
PrintStream output = null;
|
||
|
|
try
|
||
|
|
{
|
||
|
|
output = new PrintStream(new FileOutputStream(output_filename));
|
||
|
|
}
|
||
|
|
catch (Exception e)
|
||
|
|
{
|
||
|
|
throw new RuntimeException(e);
|
||
|
|
}
|
||
|
|
|
||
|
|
output.printf("interval flag ref alt missing_base n_total_sequenom failure_rate_sequenom n_alt_sequencing HWE_sequencing_chi HWE_sequenom_chi HWE_sequencing_p HWE_sequenom_p\n");
|
||
|
|
|
||
|
|
VCFReader reader1;
|
||
|
|
VCFReader reader2;
|
||
|
|
|
||
|
|
if (autocorrect)
|
||
|
|
{
|
||
|
|
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
|
||
|
|
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
reader1 = new VCFReader(new File(filename1));
|
||
|
|
reader2 = new VCFReader(new File(filename2));
|
||
|
|
}
|
||
|
|
|
||
|
|
VCFHeader header1 = reader1.getHeader();
|
||
|
|
VCFHeader header2 = reader2.getHeader();
|
||
|
|
|
||
|
|
VCFRecord record1 = reader1.next();
|
||
|
|
VCFRecord record2 = reader2.next();
|
||
|
|
|
||
|
|
|
||
|
|
while(true)
|
||
|
|
{
|
||
|
|
if (record1 == null) { break; }
|
||
|
|
if (record2 == null) { break; }
|
||
|
|
|
||
|
|
String[] sample_names = record2.getSampleNames();
|
||
|
|
|
||
|
|
Interval interval1 = VCFTool.getIntervalFromRecord(record1);
|
||
|
|
Interval interval2 = VCFTool.getIntervalFromRecord(record2);
|
||
|
|
|
||
|
|
int comparison = interval1.compareTo(interval2);
|
||
|
|
|
||
|
|
if (comparison == 0)
|
||
|
|
{
|
||
|
|
// records match! compute concordance.
|
||
|
|
|
||
|
|
// (unless one of them is "filtered")
|
||
|
|
if (record1.isFiltered() || record2.isFiltered())
|
||
|
|
{
|
||
|
|
record1 = reader1.next();
|
||
|
|
record2 = reader2.next();
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
char ref = record1.getReference().charAt(0);
|
||
|
|
char alt = VCFTool.getAlt(record2);
|
||
|
|
|
||
|
|
int n_total_sequenom = VCFTool.Compute_n_total(record1);
|
||
|
|
double failure_rate_sequenom = VCFTool.Compute_failure_rate(record1);
|
||
|
|
|
||
|
|
int n_alt_sequenom = VCFTool.Compute_n_alt(record1);
|
||
|
|
int n_alt_sequencing = VCFTool.Compute_n_alt(record2);
|
||
|
|
|
||
|
|
double HWE_sequenom = VCFTool.Compute_HWE(record1, sample_names);
|
||
|
|
double HWE_sequencing = VCFTool.Compute_HWE(record2);
|
||
|
|
|
||
|
|
boolean isPolymorphic_sequenom = (n_alt_sequenom > 0) ? true : false;
|
||
|
|
boolean isPolymorphic_sequencing = (n_alt_sequencing > 0) ? true : false;
|
||
|
|
|
||
|
|
String flag = null;
|
||
|
|
char missing_base = '.';
|
||
|
|
|
||
|
|
if (isPolymorphic_sequenom)
|
||
|
|
{
|
||
|
|
flag = "TP";
|
||
|
|
if (n_alt_sequenom == n_total_sequenom) { missing_base = ref; }
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
flag = "FP";
|
||
|
|
missing_base = alt;
|
||
|
|
}
|
||
|
|
|
||
|
|
output.printf("%s %s %c %c %c %d %f %d %f %f %f %f\n",
|
||
|
|
interval1,
|
||
|
|
flag,
|
||
|
|
ref,
|
||
|
|
alt,
|
||
|
|
missing_base,
|
||
|
|
n_total_sequenom,
|
||
|
|
failure_rate_sequenom,
|
||
|
|
n_alt_sequencing,
|
||
|
|
HWE_sequencing,
|
||
|
|
HWE_sequenom,
|
||
|
|
VCFTool.P_from_Chi(HWE_sequencing),
|
||
|
|
VCFTool.P_from_Chi(HWE_sequenom));
|
||
|
|
|
||
|
|
record1 = reader1.next();
|
||
|
|
record2 = reader2.next();
|
||
|
|
}
|
||
|
|
else if (comparison > 0)
|
||
|
|
{
|
||
|
|
// interval1 is later than interval2.
|
||
|
|
//System.err.printf("Skipping (2): %s\n", VCFTool.getIntervalFromRecord(record2));
|
||
|
|
record2 = reader2.next();
|
||
|
|
}
|
||
|
|
else if (comparison < 0)
|
||
|
|
{
|
||
|
|
// interval2 is later than interval1.
|
||
|
|
//System.err.printf("Skipping (1): %s\n", VCFTool.getIntervalFromRecord(record1));
|
||
|
|
record1 = reader1.next();
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
output.flush();
|
||
|
|
output.close();
|
||
|
|
|
||
|
|
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|