From 0d4ea30d6d6e5db9c3e6db08215a2a80d4977958 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sun, 18 Mar 2012 22:31:54 -0400 Subject: [PATCH 02/12] Updating the BQSR Gatherer to the new file format This is important for quick turnaround in the analysis cycle of the new covariates. Also added a dummy unit test that doesn't really test anything (disabled), but helps in debugging. --- .../sting/gatk/walkers/bqsr/BQSRGatherer.java | 124 ++++++++++++++++++ .../bqsr/RecalibrationArgumentCollection.java | 3 +- .../walkers/bqsr/BQSRGathererUnitTest.java | 29 ++++ 3 files changed, 154 insertions(+), 2 deletions(-) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java new file mode 100755 index 000000000..3712f0cc5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2011 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.commandline.Gatherer; +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatumOptimized; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * User: carneiro + * Date: 3/29/11 + */ + + +public class BQSRGatherer extends Gatherer { + + ///////////////////////////// + // Private Member Variables + ///////////////////////////// + private static final String EOF_MARKER = "EOF"; + + private HashMap dataMap = new HashMap(); + + + private void addCSVData (String line) { + String[] covariates = line.split(","); + String key = ""; + RecalDatumOptimized values; + + for (int i = 0; i < covariates.length-3; i++) + key += covariates[i] + ","; + + if (covariates.length < 3) + throw new ReviewedStingException("Line only has 1 covariate : " + line); + + values = new RecalDatumOptimized(Long.parseLong(covariates[covariates.length - 3]), Long.parseLong(covariates[covariates.length - 2])); + + RecalDatumOptimized currentValues = dataMap.get(key); + if (currentValues == null) + dataMap.put(key, values); + else + currentValues.increment(values); + + } + + @Override + public void gather(List inputs, File output) { + PrintStream o; + try { + o = new PrintStream(output); + } catch ( FileNotFoundException e) { + throw new UserException("File to be output by CountCovariates Gather function was not found"); + } + + boolean sawEOF = false; + boolean printedHeader = false; + + // Read input files + for ( File RECAL_FILE : inputs) { + try { + for ( String line : new XReadLines(RECAL_FILE) ) { + if ( EOF_MARKER.equals(line) ) { + sawEOF = true; // sanity check + break; + } + + else if(line.startsWith("#")) { + if (!printedHeader) + o.println(line); + } + + else // Found a line of data + addCSVData(line); // Parse the line and add the data to the HashMap + } + + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e); + } + + if ( !sawEOF ) { + final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted!"; + throw new UserException.MalformedFile(RECAL_FILE, errorMessage); + } + printedHeader = true; + } + + // Write output file from dataMap + for(Map.Entry entry : dataMap.entrySet()) + o.println(entry.getKey() + entry.getValue().outputToCSV()); + o.println("EOF"); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index ab173e4fb..40f28f644 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer; import java.io.PrintStream; import java.util.Collections; @@ -59,7 +58,7 @@ public class RecalibrationArgumentCollection { * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, * and the raw empirical quality score calculated by phred-scaling the mismatch rate. */ - @Gather(CountCovariatesGatherer.class) + @Gather(BQSRGatherer.class) @Output protected PrintStream RECAL_FILE; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java new file mode 100644 index 000000000..f1df6f9a7 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.testng.annotations.Test; + +import java.io.File; +import java.util.LinkedList; +import java.util.List; + +/** + * @author Mauricio Carneiro + * @since 3/7/12 + */ +public class BQSRGathererUnitTest { + RecalibrationArgumentCollection RAC; + + private static File recal1 = new File("public/testdata/exampleCSV.csv"); + private static File recal2 = new File("public/testdata/exampleCSV.2.csv"); + + @Test(enabled = false) + public void testCombineTwoFiles() { + BQSRGatherer gatherer = new BQSRGatherer(); + List recalFiles = new LinkedList (); + File output = new File("foo.csv"); + + recalFiles.add(recal1); + recalFiles.add(recal2); + gatherer.gather(recalFiles, output); + } +} From 7afb3338112364b25e367525191db9400d37eb56 Mon Sep 17 00:00:00 2001 From: Roger Zurawicki Date: Sun, 18 Mar 2012 01:05:49 -0400 Subject: [PATCH 03/12] GATK Report code cleanup - Updated the documentation on the code - Made the table.write() method private and updated necessary files. - Added a constructor to GATKReport that takes GATKReportTables - Optimized my code Signed-off-by: Mauricio Carneiro --- .../sting/gatk/report/GATKReport.java | 17 ++- .../sting/gatk/report/GATKReportColumn.java | 2 +- .../gatk/report/GATKReportColumnFormat.java | 6 +- .../sting/gatk/report/GATKReportColumns.java | 15 ++- .../sting/gatk/report/GATKReportDataType.java | 4 +- .../sting/gatk/report/GATKReportGatherer.java | 31 +++-- .../sting/gatk/report/GATKReportTable.java | 108 ++---------------- .../sting/gatk/report/GATKReportVersion.java | 4 +- .../gatk/walkers/diffengine/DiffEngine.java | 5 +- .../sting/gatk/report/GATKReportUnitTest.java | 12 +- 10 files changed, 73 insertions(+), 131 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index bee6dd69e..ff0c39f41 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -41,10 +41,10 @@ import java.util.TreeMap; public class GATKReport { public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport."; public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0; - public static final String SEPARATOR = ":"; + private static final String SEPARATOR = ":"; private GATKReportVersion version = LATEST_REPORT_VERSION; - private TreeMap tables = new TreeMap(); + private final TreeMap tables = new TreeMap(); /** * Create a new, empty GATKReport. @@ -70,6 +70,15 @@ public class GATKReport { loadReport(file); } + /** + * Create a new GATK report from GATK report tables + * @param tables Any number of tables that you want ot add to the report + */ + public GATKReport(GATKReportTable... tables) { + for( GATKReportTable table: tables) + addTable(table); + } + /** * Load a GATKReport file from disk * @@ -202,10 +211,6 @@ public class GATKReport { return version; } - public void setVersion(GATKReportVersion version) { - this.version = version; - } - /** * Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything * in between. This does not check if the data inside is the same. This is the check to see if the two reports are diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 7e64c8082..9a7c4ced0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -199,7 +199,7 @@ public class GATKReportColumn extends TreeMap { defaultValue.equals(that.defaultValue) ); } - protected boolean equals(GATKReportColumn that) { + boolean equals(GATKReportColumn that) { if ( !this.keySet().equals(that.keySet()) ) { return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java index 6d19a83aa..79ae9b8bd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -29,8 +29,8 @@ package org.broadinstitute.sting.gatk.report; */ public class GATKReportColumnFormat { public static enum Alignment { LEFT, RIGHT } - public int width; - public Alignment alignment; + private final int width; + private final Alignment alignment; public GATKReportColumnFormat(int width, Alignment alignment) { this.width = width; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java index ca1de49f9..bb6e3a4f1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java @@ -24,13 +24,15 @@ package org.broadinstitute.sting.gatk.report; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + import java.util.*; /** * Tracks a linked list of GATKReportColumn in order by name. */ public class GATKReportColumns extends LinkedHashMap implements Iterable { - private List columnNames = new ArrayList(); + private final List columnNames = new ArrayList(); /** * Returns the column by index @@ -43,9 +45,12 @@ public class GATKReportColumns extends LinkedHashMap i } @Override - public GATKReportColumn remove(Object key) { - columnNames.remove(key); - return super.remove(key); + public GATKReportColumn remove(Object columnName) { + if ( !(columnName instanceof String) ) { + throw new ReviewedStingException("The column name must be a String!"); + } + columnNames.remove(columnName.toString()); + return super.remove(columnName); } @Override @@ -85,7 +90,7 @@ public class GATKReportColumns extends LinkedHashMap i return true; } - protected boolean equals(GATKReportColumns that) { + boolean equals(GATKReportColumns that) { for (Map.Entry pair : entrySet()) { // Make sure that every column is the same, we know that the # of columns // is the same from isSameFormat() diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java index 414102fec..d9bae19c7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java @@ -67,7 +67,7 @@ public enum GATKReportDataType { */ String("%[Ss]"); - public final String dataTypeString; + private final String dataTypeString; private GATKReportDataType(String dataTypeString) { this.dataTypeString = dataTypeString; @@ -189,7 +189,7 @@ public enum GATKReportDataType { * @param obj The input string * @return an object that matches the data type. */ - protected Object Parse(Object obj) { + Object Parse(Object obj) { if (obj instanceof String) { String str = obj.toString(); switch (this) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java index 0d15971ae..ff1f9b90c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.report; import org.broadinstitute.sting.commandline.Gatherer; @@ -8,13 +32,6 @@ import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.List; -/** - * Created by IntelliJ IDEA. - * User: roger - * Date: 1/9/12 - * Time: 11:17 PM - * To change this template use File | Settings | File Templates. - */ public class GATKReportGatherer extends Gatherer { @Override public void gather(List inputs, File output) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 1b5273741..81d7d7710 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -34,97 +34,14 @@ import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; -/** - * A data structure that allows data to be collected over the course of a walker's computation, then have that data - * written to a PrintStream such that it's human-readable, AWK-able, and R-friendly (given that you load it using the - * GATKReport loader module). - *

- * The goal of this object is to use the same data structure for both accumulating data during a walker's computation - * and emitting that data to a file for easy analysis in R (or any other program/language that can take in a table of - * results). Thus, all of the infrastructure below is designed simply to make printing the following as easy as - * possible: - *

- * ##:GATKReport.v0.1 ErrorRatePerCycle : The error rate per sequenced position in the reads - * cycle errorrate.61PA8.7 qualavg.61PA8.7 - * 0 0.007451835696110506 25.474613284804366 - * 1 0.002362777171937477 29.844949954504095 - * 2 9.087604507451836E-4 32.87590975254731 - * 3 5.452562704471102E-4 34.498999090081895 - * 4 9.087604507451836E-4 35.14831665150137 - * 5 5.452562704471102E-4 36.07223435225619 - * 6 5.452562704471102E-4 36.1217248908297 - * 7 5.452562704471102E-4 36.1910480349345 - * 8 5.452562704471102E-4 36.00345705967977 - *

- * Here, we have a GATKReport table - a well-formatted, easy to read representation of some tabular data. Every single - * table has this same GATKReport.v0.1 header, which permits multiple files from different sources to be cat-ed - * together, which makes it very easy to pull tables from different programs into R via a single file. - *

- * ------------ - * Definitions: - *

- * Table info: - * The first line, structured as - * ##: :
- *

- * Table header: - * The second line, specifying a unique name for each column in the table. - *

- * The first column mentioned in the table header is the "primary key" column - a column that provides the unique - * identifier for each row in the table. Once this column is created, any element in the table can be referenced by - * the row-column coordinate, i.e. "primary key"-"column name" coordinate. - *

- * When a column is added to a table, a default value must be specified (usually 0). This is the initial value for - * an element in a column. This permits operations like increment() and decrement() to work properly on columns that - * are effectively counters for a particular event. - *

- * Finally, the display property for each column can be set during column creation. This is useful when a given - * column stores an intermediate result that will be used later on, perhaps to calculate the value of another column. - * In these cases, it's obviously necessary to store the value required for further computation, but it's not - * necessary to actually print the intermediate column. - *

- * Table body: - * The values of the table itself. - *

- * --------------- - * Implementation: - *

- * The implementation of this table has two components: - * 1. A TreeSet that stores all the values ever specified for the primary key. Any get() operation that - * refers to an element where the primary key object does not exist will result in its implicit creation. I - * haven't yet decided if this is a good idea... - *

- * 2. A HashMap that stores a mapping from column name to column contents. Each - * GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap) between - * primary key and the column value. This means that, given N columns, the primary key information is stored - * N+1 times. This is obviously wasteful and can likely be handled much more elegantly in future implementations. - *

- * ------------------------------ - * Element and column operations: - *

- * In addition to simply getting and setting values, this object also permits some simple operations to be applied to - * individual elements or to whole columns. For instance, an element can be easily incremented without the hassle of - * calling get(), incrementing the obtained value by 1, and then calling set() with the new value. Also, some vector - * operations are supported. For instance, two whole columns can be divided and have the result be set to a third - * column. This is especially useful when aggregating counts in two intermediate columns that will eventually need to - * be manipulated row-by-row to compute the final column. - *

- * Note: I've made no attempt whatsoever to make these operations efficient. Right now, some of the methods check the - * type of the stored object using an instanceof call and attempt to do the right thing. Others cast the contents of - * the cell to a Number, call the Number.toDouble() method and compute a result. This is clearly not the ideal design, - * but at least the prototype contained herein works. - * - * @author Kiran Garimella - * @author Khalid Shakir - */ public class GATKReportTable { /** * REGEX that matches any table with an invalid name */ public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; - public static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; - public static final String SEPARATOR = ":"; - public static final String ENDLINE = ":;"; + private static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; + private static final String SEPARATOR = ":"; + private static final String ENDLINE = ":;"; private String tableName; private String tableDescription; @@ -418,8 +335,8 @@ public class GATKReportTable { * output file), and the format string used to display the data. * * @param columnName the name of the column - * @param defaultValue if true - the column will be displayed; if false - the column will be hidden - * @param display + * @param defaultValue the default value of a blank cell + * @param display if true - the column will be displayed; if false - the column will be hidden * @param format the format string used to display data */ public void addColumn(String columnName, Object defaultValue, boolean display, String format) { @@ -429,12 +346,6 @@ public class GATKReportTable { columns.put(columnName, new GATKReportColumn(columnName, defaultValue, display, format)); } - - public GATKReportVersion getVersion() { - return GATKReport.LATEST_REPORT_VERSION; - } - - /** * Check if the requested element exists, and if not, create it. * @@ -508,8 +419,7 @@ public class GATKReportTable { value = newValue; if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || - column.getDataType().equals(GATKReportDataType.Unknown) || - value == null) + column.getDataType().equals(GATKReportDataType.Unknown) ) columns.get(columnName).put(primaryKey, value); else throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", @@ -795,7 +705,7 @@ public class GATKReportTable { * * @return the width of the primary key column */ - public int getPrimaryKeyColumnWidth() { + int getPrimaryKeyColumnWidth() { int maxWidth = getPrimaryKeyName().length(); for (Object primaryKey : primaryKeyColumn) { @@ -814,7 +724,7 @@ public class GATKReportTable { * * @param out the PrintStream to which the table should be written */ - public void write(PrintStream out) { + void write(PrintStream out) { /* * Table header: @@ -912,7 +822,7 @@ public class GATKReportTable { * * @param input Another GATK table */ - protected void combineWith(GATKReportTable input) { + void combineWith(GATKReportTable input) { /* * This function is different from addRowsFrom because we will add the ability to sum,average, etc rows * TODO: Add other combining algorithms diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java index caac79cb5..99381cc21 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -50,7 +50,7 @@ public enum GATKReportVersion { */ V1_0("v1.0"); - public final String versionString; + private final String versionString; private GATKReportVersion(String versionString) { this.versionString = versionString; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 2159bc839..3f4b4805f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -244,7 +244,8 @@ public class DiffEngine { table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); } - table.write(params.out); + GATKReport output = new GATKReport(table); + output.print(params.out); } protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index 124bda7bc..90c92189e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -80,11 +80,15 @@ public class GATKReportUnitTest extends BaseTest { @Test public void testSimpleGATKReport() { - GATKReport report = GATKReport.newSimpleReport("TableName", "a", "b", "Roger", "is", "Awesome"); - report.addRow("a", 'F', 12, 23.45, true); - report.addRow("ans", '3', 24.5, 456L, 2345); - report.addRow("hi", null, null, "", 2.3); + // Create a new simple GATK report named "TableName" with columns: Roger, is, and Awesome + GATKReport report = GATKReport.newSimpleReport("TableName", "Roger", "is", "Awesome"); + // Add data to simple GATK report + report.addRow( 12, 23.45, true); + report.addRow("ans", '3', 24.5); + report.addRow("hi", "", 2.3); + + // Print the report to console //report.print(System.out); try { From 633b5c687d6ac8a1102b8d0fe1939bfb92442f8b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 19 Mar 2012 15:27:15 -0400 Subject: [PATCH 07/12] Fixing MD5's (new GATKReport header was missing from old md5's) --- .../gatk/walkers/diffengine/DiffObjectsIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java index 408849c78..4a83c34cc 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -50,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest { @DataProvider(name = "data") public Object[][] createData() { - new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dac62fcd25e1052bf18b5707700dda7e"); - new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "e10c48dd294fb257802d4e73bb50580d"); + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dba5eab2b9587c1062721b164e4fd9a6"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "de35c93450b46db5fc5516af3c55d62a"); return TestParams.getTests(TestParams.class); } From 2324c5a74f03d8dcfa8235db47bfa0e1edf33afa Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Mar 2012 21:29:24 -0400 Subject: [PATCH 09/12] Simplified the interface for simple VCF header lines by making the VCFSimpleHeaderLine not abstract anymore - now any arbitrary header line with an ID (e.g. the contig and ALT lines) can be part of this class without having to define new classes. Also, renamed the 'named' header line to 'id' since that's more accurate. --- .../gatk/refdata/RefMetaDataTracker.java | 2 +- .../gatk/refdata/tracks/FeatureManager.java | 2 +- .../walkers/annotator/VariantAnnotator.java | 2 +- .../walkers/diffengine/VCFDiffableReader.java | 4 +- .../walkers/variantutils/VariantsToVCF.java | 4 +- .../utils/codecs/vcf/AbstractVCFCodec.java | 24 +++--- .../utils/codecs/vcf/VCFAltHeaderLine.java | 28 ------- .../codecs/vcf/VCFCompoundHeaderLine.java | 4 +- .../sting/utils/codecs/vcf/VCFConstants.java | 7 ++ .../utils/codecs/vcf/VCFFilterHeaderLine.java | 4 +- .../sting/utils/codecs/vcf/VCFHeader.java | 5 +- .../codecs/vcf/VCFHeaderLineTranslator.java | 12 ++- ...edHeaderLine.java => VCFIDHeaderLine.java} | 6 +- .../utils/codecs/vcf/VCFSimpleHeaderLine.java | 75 ++++++++++++------- .../sting/utils/codecs/vcf/VCFUtils.java | 10 +-- 15 files changed, 102 insertions(+), 87 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java rename public/java/src/org/broadinstitute/sting/utils/codecs/vcf/{VCFNamedHeaderLine.java => VCFIDHeaderLine.java} (91%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 286e22369..0e13e4ad9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -418,7 +418,7 @@ public class RefMetaDataTracker { * with the current site as a RODRecordList List object. If no data track with specified name is available, * returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up * with track name set to 'name' and location set to null; otherwise the wrapper object will have name and - * location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution, + * location set to defaultValue.getID() and defaultValue.getLocation(), respectively (use caution, * defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise: * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, * regardless of the presence of "extended" RODs overlapping with that location). diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index fcd85fd1d..55dd50334 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -132,7 +132,7 @@ public class FeatureManager { } /** - * Return the FeatureDescriptor with getName().equals(name) + * Return the FeatureDescriptor with getID().equals(name) * * @param name * @return A FeatureDescriptor or null if none is found diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 5312c4136..66c142582 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -240,7 +240,7 @@ public class VariantAnnotator extends RodWalker implements Ann for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) { if ( line instanceof VCFInfoHeaderLine ) { VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line; - if ( infoline.getName().equals(expression.fieldName) ) { + if ( infoline.getID().equals(expression.fieldName) ) { targetHeaderLine = infoline; break; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 3c0da8e9d..c9a6cb8f2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -68,8 +68,8 @@ public class VCFDiffableReader implements DiffableReader { VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); for ( VCFHeaderLine headerLine : header.getMetaData() ) { String key = headerLine.getKey(); - if ( headerLine instanceof VCFNamedHeaderLine ) - key += "_" + ((VCFNamedHeaderLine) headerLine).getName(); + if ( headerLine instanceof VCFIDHeaderLine) + key += "_" + ((VCFIDHeaderLine) headerLine).getID(); if ( root.hasElement(key) ) logger.warn("Skipping duplicate header line: file=" + file + " line=" + headerLine.toString()); else diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index f5928b723..05865b587 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -216,12 +216,12 @@ public class VariantsToVCF extends RodWalker { Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); - //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); + //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getID())); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for ( VCFHeaderLine field : hInfo ) { if ( field instanceof VCFFormatHeaderLine) { - allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName()); + allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getID()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 3c2ed18e4..273d5a377 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -154,18 +154,24 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data"); } else { - if ( str.startsWith("##INFO=") ) { - VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version); + if ( str.startsWith(VCFConstants.INFO_HEADER_START) ) { + final VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version); metaData.add(info); - infoFields.put(info.getName(), info.getType()); - } else if ( str.startsWith("##FILTER=") ) { - VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.substring(9),version); + infoFields.put(info.getID(), info.getType()); + } else if ( str.startsWith(VCFConstants.FILTER_HEADER_START) ) { + final VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.substring(9), version); metaData.add(filter); - filterFields.add(filter.getName()); - } else if ( str.startsWith("##FORMAT=") ) { - VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.substring(9),version); + filterFields.add(filter.getID()); + } else if ( str.startsWith(VCFConstants.FORMAT_HEADER_START) ) { + final VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.substring(9), version); metaData.add(format); - formatFields.put(format.getName(), format.getType()); + formatFields.put(format.getID(), format.getType()); + } else if ( str.startsWith(VCFConstants.CONTIG_HEADER_START) ) { + final VCFSimpleHeaderLine contig = new VCFSimpleHeaderLine(str.substring(9), version, VCFSimpleHeaderLine.SupportedHeaderLineType.GENERIC, null); + metaData.add(contig); + } else if ( str.startsWith(VCFConstants.ALT_HEADER_START) ) { + final VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.substring(6), version, VCFSimpleHeaderLine.SupportedHeaderLineType.GENERIC, Arrays.asList("ID", "Description")); + metaData.add(alt); } else { int equals = str.indexOf("="); if ( equals != -1 ) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java deleted file mode 100644 index a9de949d8..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broadinstitute.sting.utils.codecs.vcf; - -/** - * @author ebanks - * A class representing a key=value entry for ALT fields in the VCF header - */ -public class VCFAltHeaderLine extends VCFSimpleHeaderLine { - - /** - * create a VCF filter header line - * - * @param name the name for this header line - * @param description the description for this header line - */ - public VCFAltHeaderLine(String name, String description) { - super(name, description, SupportedHeaderLineType.ALT); - } - - /** - * create a VCF info header line - * - * @param line the header line - * @param version the vcf header version - */ - protected VCFAltHeaderLine(String line, VCFHeaderVersion version) { - super(line, version, SupportedHeaderLineType.ALT); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index 97166833b..d2bd507b5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -34,7 +34,7 @@ import java.util.Map; /** * a base class for compound header lines, which include info lines and format lines (so far) */ -public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { +public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine { public enum SupportedHeaderLineType { INFO(true), FORMAT(false); @@ -52,7 +52,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF private VCFHeaderLineType type; // access methods - public String getName() { return name; } + public String getID() { return name; } public String getDescription() { return description; } public VCFHeaderLineType getType() { return type; } public VCFHeaderLineCount getCountType() { return countType; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index 8e9d989cc..b23371cc9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -80,6 +80,13 @@ public final class VCFConstants { public static final String PHASED_SWITCH_PROB_v3 = "\\"; public static final String PHASING_TOKENS = "/|\\"; + // header lines + public static final String FILTER_HEADER_START = "##FILTER"; + public static final String FORMAT_HEADER_START = "##FORMAT"; + public static final String INFO_HEADER_START = "##INFO"; + public static final String ALT_HEADER_START = "##ALT"; + public static final String CONTIG_HEADER_START = "##contig"; + // old indel alleles public static final char DELETION_ALLELE_v3 = 'D'; public static final char INSERTION_ALLELE_v3 = 'I'; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java index 418b80074..72504abd5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import java.util.Arrays; + /** * @author ebanks * A class representing a key=value entry for FILTER fields in the VCF header @@ -23,6 +25,6 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine { * @param version the vcf header version */ protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { - super(line, version, SupportedHeaderLineType.FILTER); + super(line, version, SupportedHeaderLineType.FILTER, Arrays.asList("ID", "Description")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 5c5df15ab..27bab8c41 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.util.*; @@ -126,11 +125,11 @@ public class VCFHeader { for ( VCFHeaderLine line : mMetaData ) { if ( line instanceof VCFInfoHeaderLine ) { VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line; - mInfoMetaData.put(infoLine.getName(), infoLine); + mInfoMetaData.put(infoLine.getID(), infoLine); } else if ( line instanceof VCFFormatHeaderLine ) { VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line; - mFormatMetaData.put(formatLine.getName(), formatLine); + mFormatMetaData.put(formatLine.getID(), formatLine); } else { mOtherMetaData.put(line.getKey(), line); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java index e39a09cb1..88fed75d7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java @@ -73,10 +73,14 @@ class VCF4Parser implements VCFLineParser { // validate the tags against the expected list index = 0; - if (ret.size() > expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + ret.size() + " in string " + expectedTagOrder.size()); - for (String str : ret.keySet()) { - if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine); - index++; + if ( expectedTagOrder != null ) { + if ( ret.size() > expectedTagOrder.size() ) + throw new IllegalArgumentException("Unexpected tag count " + ret.size() + " in string " + expectedTagOrder.size()); + for ( String str : ret.keySet() ) { + if ( !expectedTagOrder.get(index).equals(str) ) + throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine); + index++; + } } return ret; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFNamedHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java similarity index 91% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFNamedHeaderLine.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java index f78e936b2..65321881a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFNamedHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java @@ -24,7 +24,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; -/** an interface for named header lines **/ -public interface VCFNamedHeaderLine { - String getName(); +/** an interface for ID-based header lines **/ +public interface VCFIDHeaderLine { + String getID(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java index 152043f28..ea485e956 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import java.util.Arrays; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; @@ -9,15 +9,16 @@ import java.util.Map; * @author ebanks * A class representing a key=value entry for simple VCF header types */ -public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { +public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine { public enum SupportedHeaderLineType { - FILTER, ALT; + FILTER, GENERIC; } private String name; - private String description; + private Map genericFields = new LinkedHashMap(); + // our type of line, i.e. filter, alt, etc private final SupportedHeaderLineType lineType; @@ -25,18 +26,29 @@ public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNa /** * create a VCF filter header line * - * @param name the name for this header line - * @param description the description for this header line - * @param lineType the header line type + * @param name the name for this header line + * @param genericFields other fields for this header line + * @param lineType the header line type + */ + public VCFSimpleHeaderLine(String name, Map genericFields, SupportedHeaderLineType lineType) { + super(lineType.toString(), ""); + this.lineType = lineType; + initialize(name, genericFields); + } + + /** + * create a VCF filter header line + * + * @param name the name for this header line + * @param description description for this header line + * @param lineType the header line type */ public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) { super(lineType.toString(), ""); this.lineType = lineType; - this.name = name; - this.description = description; - - if ( name == null || description == null ) - throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description )); + Map map = new LinkedHashMap(1); + map.put("Description", description); + initialize(name, map); } /** @@ -44,22 +56,29 @@ public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNa * * @param line the header line * @param version the vcf header version - * @param lineType the header line type + * @param lineType the header line type + * @param expectedTagOrdering the tag ordering expected for this header line */ - protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { + protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType, List expectedTagOrdering) { super(lineType.toString(), ""); this.lineType = lineType; - Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description")); + Map mapping = VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering); name = mapping.get("ID"); - description = mapping.get("Description"); - if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided - description = UNBOUND_DESCRIPTION; + initialize(name, mapping); + } + + protected void initialize(String name, Map genericFields) { + if ( name == null || genericFields == null || genericFields.isEmpty() ) + throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s", super.getKey(), name)); + + this.name = name; + this.genericFields.putAll(genericFields); } protected String toStringEncoding() { - Map map = new LinkedHashMap(); + Map map = new LinkedHashMap(); map.put("ID", name); - map.put("Description", description); + map.putAll(genericFields); return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); } @@ -67,15 +86,21 @@ public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNa if ( !(o instanceof VCFSimpleHeaderLine) ) return false; VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o; - return name.equals(other.name) && - description.equals(other.description); + if ( !name.equals(other.name) || genericFields.size() != other.genericFields.size() ) + return false; + for ( Map.Entry entry : genericFields.entrySet() ) { + if ( !entry.getValue().equals(other.genericFields.get(entry.getKey())) ) + return false; + } + + return true; } - public String getName() { + public String getID() { return name; } - public String getDescription() { - return description; + public Map getGenericFields() { + return genericFields; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index 5bd6a9b32..238a06243 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -155,10 +155,10 @@ public class VCFUtils { for ( VCFHeader source : headers ) { //System.out.printf("Merging in header %s%n", source); for ( VCFHeaderLine line : source.getMetaData()) { - String key = line.getKey(); - if ( line instanceof VCFNamedHeaderLine) - key = key + "" + ((VCFNamedHeaderLine) line).getName(); + String key = line.getKey(); + if ( line instanceof VCFIDHeaderLine ) + key = key + "-" + ((VCFIDHeaderLine)line).getID(); if ( map.containsKey(key) ) { VCFHeaderLine other = map.get(key); @@ -166,8 +166,8 @@ public class VCFUtils { continue; else if ( ! line.getClass().equals(other.getClass()) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); - else if ( line instanceof VCFFilterHeaderLine) { - String lineName = ((VCFFilterHeaderLine) line).getName(); String otherName = ((VCFFilterHeaderLine) other).getName(); + else if ( line instanceof VCFFilterHeaderLine ) { + String lineName = ((VCFFilterHeaderLine) line).getID(); String otherName = ((VCFFilterHeaderLine) other).getID(); if ( ! lineName.equals(otherName) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); } else if ( line instanceof VCFCompoundHeaderLine ) { From ade1971581f9b72bb9d467f104c04c8a08a200e0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 20 Mar 2012 00:12:17 -0400 Subject: [PATCH 12/12] Since we allow any generic header types, there's no longer any reason to check for supported types --- .../utils/codecs/vcf/AbstractVCFCodec.java | 4 +-- .../utils/codecs/vcf/VCFFilterHeaderLine.java | 4 +-- .../utils/codecs/vcf/VCFSimpleHeaderLine.java | 32 ++++++------------- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 273d5a377..8180eba30 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -167,10 +167,10 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { metaData.add(format); formatFields.put(format.getID(), format.getType()); } else if ( str.startsWith(VCFConstants.CONTIG_HEADER_START) ) { - final VCFSimpleHeaderLine contig = new VCFSimpleHeaderLine(str.substring(9), version, VCFSimpleHeaderLine.SupportedHeaderLineType.GENERIC, null); + final VCFSimpleHeaderLine contig = new VCFSimpleHeaderLine(str.substring(9), version, VCFConstants.CONTIG_HEADER_START.substring(2), null); metaData.add(contig); } else if ( str.startsWith(VCFConstants.ALT_HEADER_START) ) { - final VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.substring(6), version, VCFSimpleHeaderLine.SupportedHeaderLineType.GENERIC, Arrays.asList("ID", "Description")); + final VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.substring(6), version, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description")); metaData.add(alt); } else { int equals = str.indexOf("="); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java index 72504abd5..dd0a333f3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java @@ -15,7 +15,7 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine { * @param description the description for this header line */ public VCFFilterHeaderLine(String name, String description) { - super(name, description, SupportedHeaderLineType.FILTER); + super("FILTER", name, description); } /** @@ -25,6 +25,6 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine { * @param version the vcf header version */ protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { - super(line, version, SupportedHeaderLineType.FILTER, Arrays.asList("ID", "Description")); + super(line, version, "FILTER", Arrays.asList("ID", "Description")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java index ea485e956..05d603073 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java @@ -11,41 +11,30 @@ import java.util.Map; */ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine { - public enum SupportedHeaderLineType { - FILTER, GENERIC; - } - private String name; private Map genericFields = new LinkedHashMap(); - - // our type of line, i.e. filter, alt, etc - private final SupportedHeaderLineType lineType; - - /** * create a VCF filter header line * + * @param key the key for this header line * @param name the name for this header line * @param genericFields other fields for this header line - * @param lineType the header line type */ - public VCFSimpleHeaderLine(String name, Map genericFields, SupportedHeaderLineType lineType) { - super(lineType.toString(), ""); - this.lineType = lineType; + public VCFSimpleHeaderLine(String key, String name, Map genericFields) { + super(key, ""); initialize(name, genericFields); } /** * create a VCF filter header line * + * @param key the key for this header line * @param name the name for this header line * @param description description for this header line - * @param lineType the header line type */ - public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) { - super(lineType.toString(), ""); - this.lineType = lineType; + public VCFSimpleHeaderLine(String key, String name, String description) { + super(key, ""); Map map = new LinkedHashMap(1); map.put("Description", description); initialize(name, map); @@ -56,12 +45,11 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin * * @param line the header line * @param version the vcf header version - * @param lineType the header line type + * @param key the key for this header line * @param expectedTagOrdering the tag ordering expected for this header line */ - protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType, List expectedTagOrdering) { - super(lineType.toString(), ""); - this.lineType = lineType; + protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, String key, List expectedTagOrdering) { + super(key, ""); Map mapping = VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering); name = mapping.get("ID"); initialize(name, mapping); @@ -79,7 +67,7 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin Map map = new LinkedHashMap(); map.put("ID", name); map.putAll(genericFields); - return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); + return getKey() + "=" + VCFHeaderLine.toStringEncoding(map); } public boolean equals(Object o) {