Example code for a bug in the VCF implementation. See JIRA entry at http://jira.broadinstitute.org:8008/browse/GSA-225
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2050 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
768f865035
commit
97ed945797
|
|
@ -0,0 +1,85 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class VCFMixedUp extends RefWalker<Integer, Integer> {
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
|
||||
if (rod instanceof RodVCF) {
|
||||
for (String rodfile : this.getToolkit().getArguments().RODBindings) {
|
||||
String[] rodpieces = rodfile.split(",");
|
||||
|
||||
if (rodpieces[2].contains(".vcf")) {
|
||||
try {
|
||||
FileReader reader = new FileReader(rodpieces[2]);
|
||||
BufferedReader breader = new BufferedReader(reader);
|
||||
|
||||
ArrayList<String> header = new ArrayList<String>();
|
||||
ArrayList<String> variant = new ArrayList<String>();
|
||||
|
||||
String line;
|
||||
while ((line = breader.readLine()) != null) {
|
||||
String[] pieces = line.split("\\s+");
|
||||
|
||||
if (line.contains("##")) {
|
||||
} else if (line.contains("#CHROM")) {
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
header.add(i, pieces[i]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
variant.add(i, pieces[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
if (i < 9 || header.get(i).contains("NA12892") || header.get(i).contains("NA10851")) {
|
||||
out.printf("%s => %s\n", header.get(i), variant.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
RodVCF vcfrod = (RodVCF) rod;
|
||||
VCFRecord record = vcfrod.mCurrentRecord;
|
||||
|
||||
out.println();
|
||||
|
||||
for (VCFGenotypeRecord gr : record.getVCFGenotypeRecords()) {
|
||||
if (gr.getSampleName().equalsIgnoreCase("NA12892") || gr.getSampleName().equalsIgnoreCase("NA10851")) {
|
||||
out.printf("%s: %s:%s:%s:%s\n", gr.getSampleName(),
|
||||
gr.toGenotypeString(record.getAlternateAlleles()),
|
||||
gr.getFields().get("GQ"),
|
||||
gr.getFields().get("DP"),
|
||||
gr.getFields().get("HQ"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter> {
|
||||
@Argument(fullName="sample", shortName="SN", doc="Sample to include (or nothing to specify all samples)", required=false)
|
||||
private HashSet<String> SAMPLES;
|
||||
|
||||
@Argument(fullName="vcfsubset", shortName="O", doc="File to write VCF subset to", required=false)
|
||||
private File VPATH;
|
||||
|
||||
@Argument(fullName="includeNonVariants", shortName="INV", doc="Include non-variant loci", required=false)
|
||||
private boolean INCLUDE_NON_VARIANTS = false;
|
||||
|
||||
@Argument(fullName="includeFiltered", shortName="IF", doc="Include filtered loci", required=false)
|
||||
private boolean INCLUDE_FILTERED = false;
|
||||
|
||||
private VCFHeader vheader = null;
|
||||
private VCFWriter vwriter = null;
|
||||
|
||||
public void initializeWriter() {
|
||||
Map<String, String> metaData = new HashMap<String, String>();
|
||||
Set<String> additionalColumns = new HashSet<String>();
|
||||
|
||||
metaData.put("format", "VCRv3.2");
|
||||
metaData.put("source", "VariantsToVCF");
|
||||
metaData.put("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath());
|
||||
|
||||
additionalColumns.add("FORMAT");
|
||||
additionalColumns.addAll(SAMPLES);
|
||||
|
||||
vheader = new VCFHeader(metaData, additionalColumns);
|
||||
if (VPATH != null) {
|
||||
vwriter = new VCFWriter(vheader, VPATH);
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<VCFRecord> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ArrayList<VCFRecord> records = new ArrayList<VCFRecord>();
|
||||
|
||||
for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
|
||||
if (rod instanceof RodVCF) {
|
||||
RodVCF vcfrod = (RodVCF) rod;
|
||||
VCFRecord record = vcfrod.mCurrentRecord;
|
||||
|
||||
if (SAMPLES == null) {
|
||||
SAMPLES = new HashSet<String>();
|
||||
SAMPLES.addAll(vcfrod.getHeader().getGenotypeSamples());
|
||||
}
|
||||
|
||||
if (VPATH != null && vwriter == null) {
|
||||
initializeWriter();
|
||||
}
|
||||
|
||||
//out.println(record.toStringRepresentation(vcfrod.getHeader()));
|
||||
|
||||
records.add(record);
|
||||
}
|
||||
}
|
||||
|
||||
return records;
|
||||
}
|
||||
|
||||
public VCFWriter reduceInit() {
|
||||
return vwriter;
|
||||
}
|
||||
|
||||
private VCFRecord subsetRecord(VCFRecord record) {
|
||||
ArrayList<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
||||
for (VCFGenotypeRecord gr : record.getVCFGenotypeRecords()) {
|
||||
//if (gr.getSampleName().equalsIgnoreCase(SAMPLE)) {
|
||||
if (SAMPLES.contains(gr.getSampleName())) {
|
||||
//out.println(gr.getSampleName() + " " + gr.toGenotypeString(record.getAlternateAlleles()));
|
||||
genotypeRecords.add(gr);
|
||||
}
|
||||
}
|
||||
|
||||
VCFRecord subset = new VCFRecord(record.getReferenceBase(),
|
||||
record.getChromosome(),
|
||||
(int) record.getPosition(),
|
||||
record.getID(),
|
||||
record.getAlternateAlleles(),
|
||||
record.getQual(),
|
||||
record.getFilterString(),
|
||||
record.getInfoValues(),
|
||||
record.getGenotypeFormatString(),
|
||||
genotypeRecords);
|
||||
|
||||
return subset;
|
||||
}
|
||||
|
||||
public VCFWriter reduce(ArrayList<VCFRecord> records, VCFWriter writer) {
|
||||
for (VCFRecord record : records) {
|
||||
VCFRecord subset = subsetRecord(record);
|
||||
|
||||
boolean isVariant = false;
|
||||
for (VCFGenotypeEncoding ge : subset.getVCFGenotypeRecords().get(0).getAlleles()) {
|
||||
if (record.getReferenceBase() != ge.getBases().charAt(0)) {
|
||||
isVariant = true;
|
||||
}
|
||||
}
|
||||
|
||||
//if (isVariant && !subset.isFiltered()) {
|
||||
if ((isVariant || INCLUDE_NON_VARIANTS) && (!subset.isFiltered() || INCLUDE_FILTERED)) {
|
||||
if (writer != null) {
|
||||
writer.addRecord(subset);
|
||||
} else {
|
||||
out.println(subset.toStringRepresentation(vheader));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return writer;
|
||||
}
|
||||
|
||||
public void onTraversalDone(VCFWriter writer) {
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue