diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 286e22369..0e13e4ad9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -418,7 +418,7 @@ public class RefMetaDataTracker { * with the current site as a RODRecordList List object. If no data track with specified name is available, * returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up * with track name set to 'name' and location set to null; otherwise the wrapper object will have name and - * location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution, + * location set to defaultValue.getID() and defaultValue.getLocation(), respectively (use caution, * defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise: * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, * regardless of the presence of "extended" RODs overlapping with that location). diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index fcd85fd1d..55dd50334 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -132,7 +132,7 @@ public class FeatureManager { } /** - * Return the FeatureDescriptor with getName().equals(name) + * Return the FeatureDescriptor with getID().equals(name) * * @param name * @return A FeatureDescriptor or null if none is found diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index bee6dd69e..ff0c39f41 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -41,10 +41,10 @@ import java.util.TreeMap; public class GATKReport { public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport."; public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0; - public static final String SEPARATOR = ":"; + private static final String SEPARATOR = ":"; private GATKReportVersion version = LATEST_REPORT_VERSION; - private TreeMap tables = new TreeMap(); + private final TreeMap tables = new TreeMap(); /** * Create a new, empty GATKReport. @@ -70,6 +70,15 @@ public class GATKReport { loadReport(file); } + /** + * Create a new GATK report from GATK report tables + * @param tables Any number of tables that you want ot add to the report + */ + public GATKReport(GATKReportTable... tables) { + for( GATKReportTable table: tables) + addTable(table); + } + /** * Load a GATKReport file from disk * @@ -202,10 +211,6 @@ public class GATKReport { return version; } - public void setVersion(GATKReportVersion version) { - this.version = version; - } - /** * Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything * in between. This does not check if the data inside is the same. This is the check to see if the two reports are diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 7e64c8082..9a7c4ced0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -199,7 +199,7 @@ public class GATKReportColumn extends TreeMap { defaultValue.equals(that.defaultValue) ); } - protected boolean equals(GATKReportColumn that) { + boolean equals(GATKReportColumn that) { if ( !this.keySet().equals(that.keySet()) ) { return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java index 6d19a83aa..79ae9b8bd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -29,8 +29,8 @@ package org.broadinstitute.sting.gatk.report; */ public class GATKReportColumnFormat { public static enum Alignment { LEFT, RIGHT } - public int width; - public Alignment alignment; + private final int width; + private final Alignment alignment; public GATKReportColumnFormat(int width, Alignment alignment) { this.width = width; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java index ca1de49f9..bb6e3a4f1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java @@ -24,13 +24,15 @@ package org.broadinstitute.sting.gatk.report; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + import java.util.*; /** * Tracks a linked list of GATKReportColumn in order by name. */ public class GATKReportColumns extends LinkedHashMap implements Iterable { - private List columnNames = new ArrayList(); + private final List columnNames = new ArrayList(); /** * Returns the column by index @@ -43,9 +45,12 @@ public class GATKReportColumns extends LinkedHashMap i } @Override - public GATKReportColumn remove(Object key) { - columnNames.remove(key); - return super.remove(key); + public GATKReportColumn remove(Object columnName) { + if ( !(columnName instanceof String) ) { + throw new ReviewedStingException("The column name must be a String!"); + } + columnNames.remove(columnName.toString()); + return super.remove(columnName); } @Override @@ -85,7 +90,7 @@ public class GATKReportColumns extends LinkedHashMap i return true; } - protected boolean equals(GATKReportColumns that) { + boolean equals(GATKReportColumns that) { for (Map.Entry pair : entrySet()) { // Make sure that every column is the same, we know that the # of columns // is the same from isSameFormat() diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java index 414102fec..d9bae19c7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java @@ -67,7 +67,7 @@ public enum GATKReportDataType { */ String("%[Ss]"); - public final String dataTypeString; + private final String dataTypeString; private GATKReportDataType(String dataTypeString) { this.dataTypeString = dataTypeString; @@ -189,7 +189,7 @@ public enum GATKReportDataType { * @param obj The input string * @return an object that matches the data type. */ - protected Object Parse(Object obj) { + Object Parse(Object obj) { if (obj instanceof String) { String str = obj.toString(); switch (this) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java index 0d15971ae..ff1f9b90c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.report; import org.broadinstitute.sting.commandline.Gatherer; @@ -8,13 +32,6 @@ import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.List; -/** - * Created by IntelliJ IDEA. - * User: roger - * Date: 1/9/12 - * Time: 11:17 PM - * To change this template use File | Settings | File Templates. - */ public class GATKReportGatherer extends Gatherer { @Override public void gather(List inputs, File output) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 1b5273741..81d7d7710 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -34,97 +34,14 @@ import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; -/** - * A data structure that allows data to be collected over the course of a walker's computation, then have that data - * written to a PrintStream such that it's human-readable, AWK-able, and R-friendly (given that you load it using the - * GATKReport loader module). - *

- * The goal of this object is to use the same data structure for both accumulating data during a walker's computation - * and emitting that data to a file for easy analysis in R (or any other program/language that can take in a table of - * results). Thus, all of the infrastructure below is designed simply to make printing the following as easy as - * possible: - *

- * ##:GATKReport.v0.1 ErrorRatePerCycle : The error rate per sequenced position in the reads - * cycle errorrate.61PA8.7 qualavg.61PA8.7 - * 0 0.007451835696110506 25.474613284804366 - * 1 0.002362777171937477 29.844949954504095 - * 2 9.087604507451836E-4 32.87590975254731 - * 3 5.452562704471102E-4 34.498999090081895 - * 4 9.087604507451836E-4 35.14831665150137 - * 5 5.452562704471102E-4 36.07223435225619 - * 6 5.452562704471102E-4 36.1217248908297 - * 7 5.452562704471102E-4 36.1910480349345 - * 8 5.452562704471102E-4 36.00345705967977 - *

- * Here, we have a GATKReport table - a well-formatted, easy to read representation of some tabular data. Every single - * table has this same GATKReport.v0.1 header, which permits multiple files from different sources to be cat-ed - * together, which makes it very easy to pull tables from different programs into R via a single file. - *

- * ------------ - * Definitions: - *

- * Table info: - * The first line, structured as - * ##: :
- *

- * Table header: - * The second line, specifying a unique name for each column in the table. - *

- * The first column mentioned in the table header is the "primary key" column - a column that provides the unique - * identifier for each row in the table. Once this column is created, any element in the table can be referenced by - * the row-column coordinate, i.e. "primary key"-"column name" coordinate. - *

- * When a column is added to a table, a default value must be specified (usually 0). This is the initial value for - * an element in a column. This permits operations like increment() and decrement() to work properly on columns that - * are effectively counters for a particular event. - *

- * Finally, the display property for each column can be set during column creation. This is useful when a given - * column stores an intermediate result that will be used later on, perhaps to calculate the value of another column. - * In these cases, it's obviously necessary to store the value required for further computation, but it's not - * necessary to actually print the intermediate column. - *

- * Table body: - * The values of the table itself. - *

- * --------------- - * Implementation: - *

- * The implementation of this table has two components: - * 1. A TreeSet that stores all the values ever specified for the primary key. Any get() operation that - * refers to an element where the primary key object does not exist will result in its implicit creation. I - * haven't yet decided if this is a good idea... - *

- * 2. A HashMap that stores a mapping from column name to column contents. Each - * GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap) between - * primary key and the column value. This means that, given N columns, the primary key information is stored - * N+1 times. This is obviously wasteful and can likely be handled much more elegantly in future implementations. - *

- * ------------------------------ - * Element and column operations: - *

- * In addition to simply getting and setting values, this object also permits some simple operations to be applied to - * individual elements or to whole columns. For instance, an element can be easily incremented without the hassle of - * calling get(), incrementing the obtained value by 1, and then calling set() with the new value. Also, some vector - * operations are supported. For instance, two whole columns can be divided and have the result be set to a third - * column. This is especially useful when aggregating counts in two intermediate columns that will eventually need to - * be manipulated row-by-row to compute the final column. - *

- * Note: I've made no attempt whatsoever to make these operations efficient. Right now, some of the methods check the - * type of the stored object using an instanceof call and attempt to do the right thing. Others cast the contents of - * the cell to a Number, call the Number.toDouble() method and compute a result. This is clearly not the ideal design, - * but at least the prototype contained herein works. - * - * @author Kiran Garimella - * @author Khalid Shakir - */ public class GATKReportTable { /** * REGEX that matches any table with an invalid name */ public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; - public static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; - public static final String SEPARATOR = ":"; - public static final String ENDLINE = ":;"; + private static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; + private static final String SEPARATOR = ":"; + private static final String ENDLINE = ":;"; private String tableName; private String tableDescription; @@ -418,8 +335,8 @@ public class GATKReportTable { * output file), and the format string used to display the data. * * @param columnName the name of the column - * @param defaultValue if true - the column will be displayed; if false - the column will be hidden - * @param display + * @param defaultValue the default value of a blank cell + * @param display if true - the column will be displayed; if false - the column will be hidden * @param format the format string used to display data */ public void addColumn(String columnName, Object defaultValue, boolean display, String format) { @@ -429,12 +346,6 @@ public class GATKReportTable { columns.put(columnName, new GATKReportColumn(columnName, defaultValue, display, format)); } - - public GATKReportVersion getVersion() { - return GATKReport.LATEST_REPORT_VERSION; - } - - /** * Check if the requested element exists, and if not, create it. * @@ -508,8 +419,7 @@ public class GATKReportTable { value = newValue; if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || - column.getDataType().equals(GATKReportDataType.Unknown) || - value == null) + column.getDataType().equals(GATKReportDataType.Unknown) ) columns.get(columnName).put(primaryKey, value); else throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", @@ -795,7 +705,7 @@ public class GATKReportTable { * * @return the width of the primary key column */ - public int getPrimaryKeyColumnWidth() { + int getPrimaryKeyColumnWidth() { int maxWidth = getPrimaryKeyName().length(); for (Object primaryKey : primaryKeyColumn) { @@ -814,7 +724,7 @@ public class GATKReportTable { * * @param out the PrintStream to which the table should be written */ - public void write(PrintStream out) { + void write(PrintStream out) { /* * Table header: @@ -912,7 +822,7 @@ public class GATKReportTable { * * @param input Another GATK table */ - protected void combineWith(GATKReportTable input) { + void combineWith(GATKReportTable input) { /* * This function is different from addRowsFrom because we will add the ability to sum,average, etc rows * TODO: Add other combining algorithms diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java index caac79cb5..99381cc21 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -50,7 +50,7 @@ public enum GATKReportVersion { */ V1_0("v1.0"); - public final String versionString; + private final String versionString; private GATKReportVersion(String versionString) { this.versionString = versionString; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 5312c4136..66c142582 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -240,7 +240,7 @@ public class VariantAnnotator extends RodWalker implements Ann for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) { if ( line instanceof VCFInfoHeaderLine ) { VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line; - if ( infoline.getName().equals(expression.fieldName) ) { + if ( infoline.getID().equals(expression.fieldName) ) { targetHeaderLine = infoline; break; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java new file mode 100755 index 000000000..3712f0cc5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2011 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.commandline.Gatherer; +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatumOptimized; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * User: carneiro + * Date: 3/29/11 + */ + + +public class BQSRGatherer extends Gatherer { + + ///////////////////////////// + // Private Member Variables + ///////////////////////////// + private static final String EOF_MARKER = "EOF"; + + private HashMap dataMap = new HashMap(); + + + private void addCSVData (String line) { + String[] covariates = line.split(","); + String key = ""; + RecalDatumOptimized values; + + for (int i = 0; i < covariates.length-3; i++) + key += covariates[i] + ","; + + if (covariates.length < 3) + throw new ReviewedStingException("Line only has 1 covariate : " + line); + + values = new RecalDatumOptimized(Long.parseLong(covariates[covariates.length - 3]), Long.parseLong(covariates[covariates.length - 2])); + + RecalDatumOptimized currentValues = dataMap.get(key); + if (currentValues == null) + dataMap.put(key, values); + else + currentValues.increment(values); + + } + + @Override + public void gather(List inputs, File output) { + PrintStream o; + try { + o = new PrintStream(output); + } catch ( FileNotFoundException e) { + throw new UserException("File to be output by CountCovariates Gather function was not found"); + } + + boolean sawEOF = false; + boolean printedHeader = false; + + // Read input files + for ( File RECAL_FILE : inputs) { + try { + for ( String line : new XReadLines(RECAL_FILE) ) { + if ( EOF_MARKER.equals(line) ) { + sawEOF = true; // sanity check + break; + } + + else if(line.startsWith("#")) { + if (!printedHeader) + o.println(line); + } + + else // Found a line of data + addCSVData(line); // Parse the line and add the data to the HashMap + } + + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e); + } + + if ( !sawEOF ) { + final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted!"; + throw new UserException.MalformedFile(RECAL_FILE, errorMessage); + } + printedHeader = true; + } + + // Write output file from dataMap + for(Map.Entry entry : dataMap.entrySet()) + o.println(entry.getKey() + entry.getValue().outputToCSV()); + o.println("EOF"); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index ab173e4fb..40f28f644 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer; import java.io.PrintStream; import java.util.Collections; @@ -59,7 +58,7 @@ public class RecalibrationArgumentCollection { * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, * and the raw empirical quality score calculated by phred-scaling the mismatch rate. */ - @Gather(CountCovariatesGatherer.class) + @Gather(BQSRGatherer.class) @Output protected PrintStream RECAL_FILE; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 2159bc839..3f4b4805f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -244,7 +244,8 @@ public class DiffEngine { table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); } - table.write(params.out); + GATKReport output = new GATKReport(table); + output.print(params.out); } protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 3c0da8e9d..c9a6cb8f2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -68,8 +68,8 @@ public class VCFDiffableReader implements DiffableReader { VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); for ( VCFHeaderLine headerLine : header.getMetaData() ) { String key = headerLine.getKey(); - if ( headerLine instanceof VCFNamedHeaderLine ) - key += "_" + ((VCFNamedHeaderLine) headerLine).getName(); + if ( headerLine instanceof VCFIDHeaderLine) + key += "_" + ((VCFIDHeaderLine) headerLine).getID(); if ( root.hasElement(key) ) logger.warn("Skipping duplicate header line: file=" + file + " line=" + headerLine.toString()); else diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index f5928b723..05865b587 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -216,12 +216,12 @@ public class VariantsToVCF extends RodWalker { Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); - //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); + //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getID())); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for ( VCFHeaderLine field : hInfo ) { if ( field instanceof VCFFormatHeaderLine) { - allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName()); + allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getID()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 3c2ed18e4..8180eba30 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -154,18 +154,24 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data"); } else { - if ( str.startsWith("##INFO=") ) { - VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version); + if ( str.startsWith(VCFConstants.INFO_HEADER_START) ) { + final VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version); metaData.add(info); - infoFields.put(info.getName(), info.getType()); - } else if ( str.startsWith("##FILTER=") ) { - VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.substring(9),version); + infoFields.put(info.getID(), info.getType()); + } else if ( str.startsWith(VCFConstants.FILTER_HEADER_START) ) { + final VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.substring(9), version); metaData.add(filter); - filterFields.add(filter.getName()); - } else if ( str.startsWith("##FORMAT=") ) { - VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.substring(9),version); + filterFields.add(filter.getID()); + } else if ( str.startsWith(VCFConstants.FORMAT_HEADER_START) ) { + final VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.substring(9), version); metaData.add(format); - formatFields.put(format.getName(), format.getType()); + formatFields.put(format.getID(), format.getType()); + } else if ( str.startsWith(VCFConstants.CONTIG_HEADER_START) ) { + final VCFSimpleHeaderLine contig = new VCFSimpleHeaderLine(str.substring(9), version, VCFConstants.CONTIG_HEADER_START.substring(2), null); + metaData.add(contig); + } else if ( str.startsWith(VCFConstants.ALT_HEADER_START) ) { + final VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.substring(6), version, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description")); + metaData.add(alt); } else { int equals = str.indexOf("="); if ( equals != -1 ) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java deleted file mode 100644 index a9de949d8..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broadinstitute.sting.utils.codecs.vcf; - -/** - * @author ebanks - * A class representing a key=value entry for ALT fields in the VCF header - */ -public class VCFAltHeaderLine extends VCFSimpleHeaderLine { - - /** - * create a VCF filter header line - * - * @param name the name for this header line - * @param description the description for this header line - */ - public VCFAltHeaderLine(String name, String description) { - super(name, description, SupportedHeaderLineType.ALT); - } - - /** - * create a VCF info header line - * - * @param line the header line - * @param version the vcf header version - */ - protected VCFAltHeaderLine(String line, VCFHeaderVersion version) { - super(line, version, SupportedHeaderLineType.ALT); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index 97166833b..d2bd507b5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -34,7 +34,7 @@ import java.util.Map; /** * a base class for compound header lines, which include info lines and format lines (so far) */ -public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { +public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine { public enum SupportedHeaderLineType { INFO(true), FORMAT(false); @@ -52,7 +52,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF private VCFHeaderLineType type; // access methods - public String getName() { return name; } + public String getID() { return name; } public String getDescription() { return description; } public VCFHeaderLineType getType() { return type; } public VCFHeaderLineCount getCountType() { return countType; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index 8e9d989cc..b23371cc9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -80,6 +80,13 @@ public final class VCFConstants { public static final String PHASED_SWITCH_PROB_v3 = "\\"; public static final String PHASING_TOKENS = "/|\\"; + // header lines + public static final String FILTER_HEADER_START = "##FILTER"; + public static final String FORMAT_HEADER_START = "##FORMAT"; + public static final String INFO_HEADER_START = "##INFO"; + public static final String ALT_HEADER_START = "##ALT"; + public static final String CONTIG_HEADER_START = "##contig"; + // old indel alleles public static final char DELETION_ALLELE_v3 = 'D'; public static final char INSERTION_ALLELE_v3 = 'I'; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java index 418b80074..dd0a333f3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import java.util.Arrays; + /** * @author ebanks * A class representing a key=value entry for FILTER fields in the VCF header @@ -13,7 +15,7 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine { * @param description the description for this header line */ public VCFFilterHeaderLine(String name, String description) { - super(name, description, SupportedHeaderLineType.FILTER); + super("FILTER", name, description); } /** @@ -23,6 +25,6 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine { * @param version the vcf header version */ protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { - super(line, version, SupportedHeaderLineType.FILTER); + super(line, version, "FILTER", Arrays.asList("ID", "Description")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 5c5df15ab..27bab8c41 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.util.*; @@ -126,11 +125,11 @@ public class VCFHeader { for ( VCFHeaderLine line : mMetaData ) { if ( line instanceof VCFInfoHeaderLine ) { VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line; - mInfoMetaData.put(infoLine.getName(), infoLine); + mInfoMetaData.put(infoLine.getID(), infoLine); } else if ( line instanceof VCFFormatHeaderLine ) { VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line; - mFormatMetaData.put(formatLine.getName(), formatLine); + mFormatMetaData.put(formatLine.getID(), formatLine); } else { mOtherMetaData.put(line.getKey(), line); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java index e39a09cb1..88fed75d7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java @@ -73,10 +73,14 @@ class VCF4Parser implements VCFLineParser { // validate the tags against the expected list index = 0; - if (ret.size() > expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + ret.size() + " in string " + expectedTagOrder.size()); - for (String str : ret.keySet()) { - if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine); - index++; + if ( expectedTagOrder != null ) { + if ( ret.size() > expectedTagOrder.size() ) + throw new IllegalArgumentException("Unexpected tag count " + ret.size() + " in string " + expectedTagOrder.size()); + for ( String str : ret.keySet() ) { + if ( !expectedTagOrder.get(index).equals(str) ) + throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine); + index++; + } } return ret; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFNamedHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java similarity index 91% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFNamedHeaderLine.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java index f78e936b2..65321881a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFNamedHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java @@ -24,7 +24,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; -/** an interface for named header lines **/ -public interface VCFNamedHeaderLine { - String getName(); +/** an interface for ID-based header lines **/ +public interface VCFIDHeaderLine { + String getID(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java index 152043f28..05d603073 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import java.util.Arrays; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; @@ -9,34 +9,35 @@ import java.util.Map; * @author ebanks * A class representing a key=value entry for simple VCF header types */ -public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { - - public enum SupportedHeaderLineType { - FILTER, ALT; - } +public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine { private String name; - private String description; - - // our type of line, i.e. filter, alt, etc - private final SupportedHeaderLineType lineType; - + private Map genericFields = new LinkedHashMap(); /** * create a VCF filter header line * - * @param name the name for this header line - * @param description the description for this header line - * @param lineType the header line type + * @param key the key for this header line + * @param name the name for this header line + * @param genericFields other fields for this header line */ - public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) { - super(lineType.toString(), ""); - this.lineType = lineType; - this.name = name; - this.description = description; + public VCFSimpleHeaderLine(String key, String name, Map genericFields) { + super(key, ""); + initialize(name, genericFields); + } - if ( name == null || description == null ) - throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description )); + /** + * create a VCF filter header line + * + * @param key the key for this header line + * @param name the name for this header line + * @param description description for this header line + */ + public VCFSimpleHeaderLine(String key, String name, String description) { + super(key, ""); + Map map = new LinkedHashMap(1); + map.put("Description", description); + initialize(name, map); } /** @@ -44,38 +45,50 @@ public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNa * * @param line the header line * @param version the vcf header version - * @param lineType the header line type + * @param key the key for this header line + * @param expectedTagOrdering the tag ordering expected for this header line */ - protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { - super(lineType.toString(), ""); - this.lineType = lineType; - Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description")); + protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, String key, List expectedTagOrdering) { + super(key, ""); + Map mapping = VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering); name = mapping.get("ID"); - description = mapping.get("Description"); - if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided - description = UNBOUND_DESCRIPTION; + initialize(name, mapping); + } + + protected void initialize(String name, Map genericFields) { + if ( name == null || genericFields == null || genericFields.isEmpty() ) + throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s", super.getKey(), name)); + + this.name = name; + this.genericFields.putAll(genericFields); } protected String toStringEncoding() { - Map map = new LinkedHashMap(); + Map map = new LinkedHashMap(); map.put("ID", name); - map.put("Description", description); - return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); + map.putAll(genericFields); + return getKey() + "=" + VCFHeaderLine.toStringEncoding(map); } public boolean equals(Object o) { if ( !(o instanceof VCFSimpleHeaderLine) ) return false; VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o; - return name.equals(other.name) && - description.equals(other.description); + if ( !name.equals(other.name) || genericFields.size() != other.genericFields.size() ) + return false; + for ( Map.Entry entry : genericFields.entrySet() ) { + if ( !entry.getValue().equals(other.genericFields.get(entry.getKey())) ) + return false; + } + + return true; } - public String getName() { + public String getID() { return name; } - public String getDescription() { - return description; + public Map getGenericFields() { + return genericFields; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index 5bd6a9b32..238a06243 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -155,10 +155,10 @@ public class VCFUtils { for ( VCFHeader source : headers ) { //System.out.printf("Merging in header %s%n", source); for ( VCFHeaderLine line : source.getMetaData()) { - String key = line.getKey(); - if ( line instanceof VCFNamedHeaderLine) - key = key + "" + ((VCFNamedHeaderLine) line).getName(); + String key = line.getKey(); + if ( line instanceof VCFIDHeaderLine ) + key = key + "-" + ((VCFIDHeaderLine)line).getID(); if ( map.containsKey(key) ) { VCFHeaderLine other = map.get(key); @@ -166,8 +166,8 @@ public class VCFUtils { continue; else if ( ! line.getClass().equals(other.getClass()) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); - else if ( line instanceof VCFFilterHeaderLine) { - String lineName = ((VCFFilterHeaderLine) line).getName(); String otherName = ((VCFFilterHeaderLine) other).getName(); + else if ( line instanceof VCFFilterHeaderLine ) { + String lineName = ((VCFFilterHeaderLine) line).getID(); String otherName = ((VCFFilterHeaderLine) other).getID(); if ( ! lineName.equals(otherName) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); } else if ( line instanceof VCFCompoundHeaderLine ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index 124bda7bc..90c92189e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -80,11 +80,15 @@ public class GATKReportUnitTest extends BaseTest { @Test public void testSimpleGATKReport() { - GATKReport report = GATKReport.newSimpleReport("TableName", "a", "b", "Roger", "is", "Awesome"); - report.addRow("a", 'F', 12, 23.45, true); - report.addRow("ans", '3', 24.5, 456L, 2345); - report.addRow("hi", null, null, "", 2.3); + // Create a new simple GATK report named "TableName" with columns: Roger, is, and Awesome + GATKReport report = GATKReport.newSimpleReport("TableName", "Roger", "is", "Awesome"); + // Add data to simple GATK report + report.addRow( 12, 23.45, true); + report.addRow("ans", '3', 24.5); + report.addRow("hi", "", 2.3); + + // Print the report to console //report.print(System.out); try { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java new file mode 100644 index 000000000..f1df6f9a7 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.testng.annotations.Test; + +import java.io.File; +import java.util.LinkedList; +import java.util.List; + +/** + * @author Mauricio Carneiro + * @since 3/7/12 + */ +public class BQSRGathererUnitTest { + RecalibrationArgumentCollection RAC; + + private static File recal1 = new File("public/testdata/exampleCSV.csv"); + private static File recal2 = new File("public/testdata/exampleCSV.2.csv"); + + @Test(enabled = false) + public void testCombineTwoFiles() { + BQSRGatherer gatherer = new BQSRGatherer(); + List recalFiles = new LinkedList (); + File output = new File("foo.csv"); + + recalFiles.add(recal1); + recalFiles.add(recal2); + gatherer.gather(recalFiles, output); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java index 408849c78..4a83c34cc 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -50,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest { @DataProvider(name = "data") public Object[][] createData() { - new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dac62fcd25e1052bf18b5707700dda7e"); - new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "e10c48dd294fb257802d4e73bb50580d"); + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dba5eab2b9587c1062721b164e4fd9a6"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "de35c93450b46db5fc5516af3c55d62a"); return TestParams.getTests(TestParams.class); }