From 8cb16a1d45d3dc2ed6d817f0710e2bfcad56b49a Mon Sep 17 00:00:00 2001 From: delangel Date: Wed, 16 Jun 2010 02:13:08 +0000 Subject: [PATCH] a) Cleanup, remove -input argument from BeagleOutputToVCFWalker since it's not needed. b) Added back old Beagle ROD to maintain backward compatibility (does anyone even use this???) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3563 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/BeagleROD.java | 112 ++++++++++++++++++ .../gatk/walkers/BeagleOutputToVCFWalker.java | 3 - 2 files changed, 112 insertions(+), 3 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/refdata/BeagleROD.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/BeagleROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/BeagleROD.java new file mode 100755 index 000000000..e1bd47f00 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/BeagleROD.java @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.text.XReadLines; + +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.io.IOException; +import java.io.File; +import java.io.FileNotFoundException; + +public class BeagleROD extends BasicReferenceOrderedDatum { + GenomeLoc loc; + List sampleNames = null; + Map> sampleGenotypes = new HashMap>(); + + public BeagleROD(String name) { + super(name); + } + + public String toString() { return "BeagleRod"; } + + public String delimiterRegex() { + return " "; + } + + public GenomeLoc getLocation() { + return loc; + } + + public List getSampleNames() { + return sampleNames; + } + + public Map> getGenotypes() { + return sampleGenotypes; + } + + public Object initialize(final File source) throws FileNotFoundException { + String firstLine = new XReadLines(source).next(); + String[] parts = firstLine.split(" "); + if ( parts[0].equals("I") ) { + // I id NA12891 NA12891 NA12892 NA12892 + sampleNames = Arrays.asList(parts).subList(2, parts.length); + return sampleNames; + } else { + throw new IllegalStateException("Beagle file " + source + " doesn't have required header line I"); + } + } + + private static Pattern MARKER_PATTERN = Pattern.compile("c(.+)_p([0-9]+)"); + + public static GenomeLoc parseMarkerName(String markerName) { + Matcher m = MARKER_PATTERN.matcher(markerName); + if ( m.matches() ) { + String contig = m.group(1); + long start = Long.valueOf(m.group(2)); + return GenomeLocParser.createGenomeLoc(contig, start, start); + } else { + throw new IllegalArgumentException("Malformatted family structure string: " + markerName + " required format is mom+dad=child"); + } + } + + public boolean parseLine(final Object header, final String[] parts) throws IOException { + //System.out.printf("Parsing beagle parts=%s header=%s%n", parts, header); + List sampleNames = (List)header; + + if ( parts.length == 0 || ! parts[0].equals("M") ) + return false; + else { + loc = parseMarkerName(parts[1]); + + for ( int i = 2; i < parts.length; i++ ) { + String sampleName = sampleNames.get(i-2); + if ( ! sampleGenotypes.containsKey(sampleName) ) { + sampleGenotypes.put(sampleName, new ArrayList()); + } + + sampleGenotypes.get(sampleName).add(parts[i]); + } + + return true; + } + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java index 1d6ccfe7d..cf5a8f61d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java @@ -61,9 +61,6 @@ public class BeagleOutputToVCFWalker extends RodWalker { private VCFWriter vcfWriter; - @Argument(fullName="input_prefix", shortName="input", doc="The prefix added to input Beagle files gprobs, r2, ...", required=true) - private String INPUT_PREFIX = "beagle"; - @Argument(fullName="output_file", shortName="output", doc="VCF file to which output should be written", required=true) private String OUTPUT_FILE = null;