Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mark DePristo 2012-03-20 07:47:24 -04:00
commit 36636eb323
29 changed files with 332 additions and 235 deletions

View File

@ -418,7 +418,7 @@ public class RefMetaDataTracker {
* with the current site as a RODRecordList List object. If no data track with specified name is available, * with the current site as a RODRecordList List object. If no data track with specified name is available,
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up * returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and * with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution, * location set to defaultValue.getID() and defaultValue.getLocation(), respectively (use caution,
* defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise: * defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise:
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
* regardless of the presence of "extended" RODs overlapping with that location). * regardless of the presence of "extended" RODs overlapping with that location).

View File

@ -132,7 +132,7 @@ public class FeatureManager {
} }
/** /**
* Return the FeatureDescriptor with getName().equals(name) * Return the FeatureDescriptor with getID().equals(name)
* *
* @param name * @param name
* @return A FeatureDescriptor or null if none is found * @return A FeatureDescriptor or null if none is found

View File

@ -41,10 +41,10 @@ import java.util.TreeMap;
public class GATKReport { public class GATKReport {
public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport."; public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport.";
public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0; public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0;
public static final String SEPARATOR = ":"; private static final String SEPARATOR = ":";
private GATKReportVersion version = LATEST_REPORT_VERSION; private GATKReportVersion version = LATEST_REPORT_VERSION;
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>(); private final TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
/** /**
* Create a new, empty GATKReport. * Create a new, empty GATKReport.
@ -70,6 +70,15 @@ public class GATKReport {
loadReport(file); loadReport(file);
} }
/**
* Create a new GATK report from GATK report tables
* @param tables Any number of tables that you want ot add to the report
*/
public GATKReport(GATKReportTable... tables) {
for( GATKReportTable table: tables)
addTable(table);
}
/** /**
* Load a GATKReport file from disk * Load a GATKReport file from disk
* *
@ -202,10 +211,6 @@ public class GATKReport {
return version; return version;
} }
public void setVersion(GATKReportVersion version) {
this.version = version;
}
/** /**
* Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything * Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything
* in between. This does not check if the data inside is the same. This is the check to see if the two reports are * in between. This does not check if the data inside is the same. This is the check to see if the two reports are

View File

@ -199,7 +199,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
defaultValue.equals(that.defaultValue) ); defaultValue.equals(that.defaultValue) );
} }
protected boolean equals(GATKReportColumn that) { boolean equals(GATKReportColumn that) {
if ( !this.keySet().equals(that.keySet()) ) { if ( !this.keySet().equals(that.keySet()) ) {
return false; return false;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011, The Broad Institute * Copyright (c) 2012, The Broad Institute
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
@ -29,8 +29,8 @@ package org.broadinstitute.sting.gatk.report;
*/ */
public class GATKReportColumnFormat { public class GATKReportColumnFormat {
public static enum Alignment { LEFT, RIGHT } public static enum Alignment { LEFT, RIGHT }
public int width; private final int width;
public Alignment alignment; private final Alignment alignment;
public GATKReportColumnFormat(int width, Alignment alignment) { public GATKReportColumnFormat(int width, Alignment alignment) {
this.width = width; this.width = width;

View File

@ -24,13 +24,15 @@
package org.broadinstitute.sting.gatk.report; package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*; import java.util.*;
/** /**
* Tracks a linked list of GATKReportColumn in order by name. * Tracks a linked list of GATKReportColumn in order by name.
*/ */
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> { public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
private List<String> columnNames = new ArrayList<String>(); private final List<String> columnNames = new ArrayList<String>();
/** /**
* Returns the column by index * Returns the column by index
@ -43,9 +45,12 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> i
} }
@Override @Override
public GATKReportColumn remove(Object key) { public GATKReportColumn remove(Object columnName) {
columnNames.remove(key); if ( !(columnName instanceof String) ) {
return super.remove(key); throw new ReviewedStingException("The column name must be a String!");
}
columnNames.remove(columnName.toString());
return super.remove(columnName);
} }
@Override @Override
@ -85,7 +90,7 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> i
return true; return true;
} }
protected boolean equals(GATKReportColumns that) { boolean equals(GATKReportColumns that) {
for (Map.Entry<String, GATKReportColumn> pair : entrySet()) { for (Map.Entry<String, GATKReportColumn> pair : entrySet()) {
// Make sure that every column is the same, we know that the # of columns // Make sure that every column is the same, we know that the # of columns
// is the same from isSameFormat() // is the same from isSameFormat()

View File

@ -67,7 +67,7 @@ public enum GATKReportDataType {
*/ */
String("%[Ss]"); String("%[Ss]");
public final String dataTypeString; private final String dataTypeString;
private GATKReportDataType(String dataTypeString) { private GATKReportDataType(String dataTypeString) {
this.dataTypeString = dataTypeString; this.dataTypeString = dataTypeString;
@ -189,7 +189,7 @@ public enum GATKReportDataType {
* @param obj The input string * @param obj The input string
* @return an object that matches the data type. * @return an object that matches the data type.
*/ */
protected Object Parse(Object obj) { Object Parse(Object obj) {
if (obj instanceof String) { if (obj instanceof String) {
String str = obj.toString(); String str = obj.toString();
switch (this) { switch (this) {

View File

@ -1,3 +1,27 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report; package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.commandline.Gatherer; import org.broadinstitute.sting.commandline.Gatherer;
@ -8,13 +32,6 @@ import java.io.FileNotFoundException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.List; import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 1/9/12
* Time: 11:17 PM
* To change this template use File | Settings | File Templates.
*/
public class GATKReportGatherer extends Gatherer { public class GATKReportGatherer extends Gatherer {
@Override @Override
public void gather(List<File> inputs, File output) { public void gather(List<File> inputs, File output) {

View File

@ -34,97 +34,14 @@ import java.util.*;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/**
* A data structure that allows data to be collected over the course of a walker's computation, then have that data
* written to a PrintStream such that it's human-readable, AWK-able, and R-friendly (given that you load it using the
* GATKReport loader module).
* <p/>
* The goal of this object is to use the same data structure for both accumulating data during a walker's computation
* and emitting that data to a file for easy analysis in R (or any other program/language that can take in a table of
* results). Thus, all of the infrastructure below is designed simply to make printing the following as easy as
* possible:
* <p/>
* ##:GATKReport.v0.1 ErrorRatePerCycle : The error rate per sequenced position in the reads
* cycle errorrate.61PA8.7 qualavg.61PA8.7
* 0 0.007451835696110506 25.474613284804366
* 1 0.002362777171937477 29.844949954504095
* 2 9.087604507451836E-4 32.87590975254731
* 3 5.452562704471102E-4 34.498999090081895
* 4 9.087604507451836E-4 35.14831665150137
* 5 5.452562704471102E-4 36.07223435225619
* 6 5.452562704471102E-4 36.1217248908297
* 7 5.452562704471102E-4 36.1910480349345
* 8 5.452562704471102E-4 36.00345705967977
* <p/>
* Here, we have a GATKReport table - a well-formatted, easy to read representation of some tabular data. Every single
* table has this same GATKReport.v0.1 header, which permits multiple files from different sources to be cat-ed
* together, which makes it very easy to pull tables from different programs into R via a single file.
* <p/>
* ------------
* Definitions:
* <p/>
* Table info:
* The first line, structured as
* ##:<report version> <table name> : <table description>
* <p/>
* Table header:
* The second line, specifying a unique name for each column in the table.
* <p/>
* The first column mentioned in the table header is the "primary key" column - a column that provides the unique
* identifier for each row in the table. Once this column is created, any element in the table can be referenced by
* the row-column coordinate, i.e. "primary key"-"column name" coordinate.
* <p/>
* When a column is added to a table, a default value must be specified (usually 0). This is the initial value for
* an element in a column. This permits operations like increment() and decrement() to work properly on columns that
* are effectively counters for a particular event.
* <p/>
* Finally, the display property for each column can be set during column creation. This is useful when a given
* column stores an intermediate result that will be used later on, perhaps to calculate the value of another column.
* In these cases, it's obviously necessary to store the value required for further computation, but it's not
* necessary to actually print the intermediate column.
* <p/>
* Table body:
* The values of the table itself.
* <p/>
* ---------------
* Implementation:
* <p/>
* The implementation of this table has two components:
* 1. A TreeSet<Object> that stores all the values ever specified for the primary key. Any get() operation that
* refers to an element where the primary key object does not exist will result in its implicit creation. I
* haven't yet decided if this is a good idea...
* <p/>
* 2. A HashMap<String, GATKReportColumn> that stores a mapping from column name to column contents. Each
* GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap<Object, Object>) between
* primary key and the column value. This means that, given N columns, the primary key information is stored
* N+1 times. This is obviously wasteful and can likely be handled much more elegantly in future implementations.
* <p/>
* ------------------------------
* Element and column operations:
* <p/>
* In addition to simply getting and setting values, this object also permits some simple operations to be applied to
* individual elements or to whole columns. For instance, an element can be easily incremented without the hassle of
* calling get(), incrementing the obtained value by 1, and then calling set() with the new value. Also, some vector
* operations are supported. For instance, two whole columns can be divided and have the result be set to a third
* column. This is especially useful when aggregating counts in two intermediate columns that will eventually need to
* be manipulated row-by-row to compute the final column.
* <p/>
* Note: I've made no attempt whatsoever to make these operations efficient. Right now, some of the methods check the
* type of the stored object using an instanceof call and attempt to do the right thing. Others cast the contents of
* the cell to a Number, call the Number.toDouble() method and compute a result. This is clearly not the ideal design,
* but at least the prototype contained herein works.
*
* @author Kiran Garimella
* @author Khalid Shakir
*/
public class GATKReportTable { public class GATKReportTable {
/** /**
* REGEX that matches any table with an invalid name * REGEX that matches any table with an invalid name
*/ */
public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
public static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; private static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable";
public static final String SEPARATOR = ":"; private static final String SEPARATOR = ":";
public static final String ENDLINE = ":;"; private static final String ENDLINE = ":;";
private String tableName; private String tableName;
private String tableDescription; private String tableDescription;
@ -418,8 +335,8 @@ public class GATKReportTable {
* output file), and the format string used to display the data. * output file), and the format string used to display the data.
* *
* @param columnName the name of the column * @param columnName the name of the column
* @param defaultValue if true - the column will be displayed; if false - the column will be hidden * @param defaultValue the default value of a blank cell
* @param display * @param display if true - the column will be displayed; if false - the column will be hidden
* @param format the format string used to display data * @param format the format string used to display data
*/ */
public void addColumn(String columnName, Object defaultValue, boolean display, String format) { public void addColumn(String columnName, Object defaultValue, boolean display, String format) {
@ -429,12 +346,6 @@ public class GATKReportTable {
columns.put(columnName, new GATKReportColumn(columnName, defaultValue, display, format)); columns.put(columnName, new GATKReportColumn(columnName, defaultValue, display, format));
} }
public GATKReportVersion getVersion() {
return GATKReport.LATEST_REPORT_VERSION;
}
/** /**
* Check if the requested element exists, and if not, create it. * Check if the requested element exists, and if not, create it.
* *
@ -508,8 +419,7 @@ public class GATKReportTable {
value = newValue; value = newValue;
if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || if (column.getDataType().equals(GATKReportDataType.fromObject(value)) ||
column.getDataType().equals(GATKReportDataType.Unknown) || column.getDataType().equals(GATKReportDataType.Unknown) )
value == null)
columns.get(columnName).put(primaryKey, value); columns.get(columnName).put(primaryKey, value);
else else
throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s",
@ -795,7 +705,7 @@ public class GATKReportTable {
* *
* @return the width of the primary key column * @return the width of the primary key column
*/ */
public int getPrimaryKeyColumnWidth() { int getPrimaryKeyColumnWidth() {
int maxWidth = getPrimaryKeyName().length(); int maxWidth = getPrimaryKeyName().length();
for (Object primaryKey : primaryKeyColumn) { for (Object primaryKey : primaryKeyColumn) {
@ -814,7 +724,7 @@ public class GATKReportTable {
* *
* @param out the PrintStream to which the table should be written * @param out the PrintStream to which the table should be written
*/ */
public void write(PrintStream out) { void write(PrintStream out) {
/* /*
* Table header: * Table header:
@ -912,7 +822,7 @@ public class GATKReportTable {
* *
* @param input Another GATK table * @param input Another GATK table
*/ */
protected void combineWith(GATKReportTable input) { void combineWith(GATKReportTable input) {
/* /*
* This function is different from addRowsFrom because we will add the ability to sum,average, etc rows * This function is different from addRowsFrom because we will add the ability to sum,average, etc rows
* TODO: Add other combining algorithms * TODO: Add other combining algorithms

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011, The Broad Institute * Copyright (c) 2012, The Broad Institute
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
@ -50,7 +50,7 @@ public enum GATKReportVersion {
*/ */
V1_0("v1.0"); V1_0("v1.0");
public final String versionString; private final String versionString;
private GATKReportVersion(String versionString) { private GATKReportVersion(String versionString) {
this.versionString = versionString; this.versionString = versionString;

View File

@ -240,7 +240,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) { for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) {
if ( line instanceof VCFInfoHeaderLine ) { if ( line instanceof VCFInfoHeaderLine ) {
VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line; VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line;
if ( infoline.getName().equals(expression.fieldName) ) { if ( infoline.getID().equals(expression.fieldName) ) {
targetHeaderLine = infoline; targetHeaderLine = infoline;
break; break;
} }

View File

@ -0,0 +1,124 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.commandline.Gatherer;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatumOptimized;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* User: carneiro
* Date: 3/29/11
*/
public class BQSRGatherer extends Gatherer {
/////////////////////////////
// Private Member Variables
/////////////////////////////
private static final String EOF_MARKER = "EOF";
private HashMap<String, RecalDatumOptimized> dataMap = new HashMap<String, RecalDatumOptimized>();
private void addCSVData (String line) {
String[] covariates = line.split(",");
String key = "";
RecalDatumOptimized values;
for (int i = 0; i < covariates.length-3; i++)
key += covariates[i] + ",";
if (covariates.length < 3)
throw new ReviewedStingException("Line only has 1 covariate : " + line);
values = new RecalDatumOptimized(Long.parseLong(covariates[covariates.length - 3]), Long.parseLong(covariates[covariates.length - 2]));
RecalDatumOptimized currentValues = dataMap.get(key);
if (currentValues == null)
dataMap.put(key, values);
else
currentValues.increment(values);
}
@Override
public void gather(List<File> inputs, File output) {
PrintStream o;
try {
o = new PrintStream(output);
} catch ( FileNotFoundException e) {
throw new UserException("File to be output by CountCovariates Gather function was not found");
}
boolean sawEOF = false;
boolean printedHeader = false;
// Read input files
for ( File RECAL_FILE : inputs) {
try {
for ( String line : new XReadLines(RECAL_FILE) ) {
if ( EOF_MARKER.equals(line) ) {
sawEOF = true; // sanity check
break;
}
else if(line.startsWith("#")) {
if (!printedHeader)
o.println(line);
}
else // Found a line of data
addCSVData(line); // Parse the line and add the data to the HashMap
}
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
}
if ( !sawEOF ) {
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted!";
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
}
printedHeader = true;
}
// Write output file from dataMap
for(Map.Entry<String, RecalDatumOptimized> entry : dataMap.entrySet())
o.println(entry.getKey() + entry.getValue().outputToCSV());
o.println("EOF");
}
}

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Collections; import java.util.Collections;
@ -59,7 +58,7 @@ public class RecalibrationArgumentCollection {
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
*/ */
@Gather(CountCovariatesGatherer.class) @Gather(BQSRGatherer.class)
@Output @Output
protected PrintStream RECAL_FILE; protected PrintStream RECAL_FILE;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011, The Broad Institute * Copyright (c) 2012, The Broad Institute
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
@ -244,7 +244,8 @@ public class DiffEngine {
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString());
} }
table.write(params.out); GATKReport output = new GATKReport(table);
output.print(params.out);
} }
protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) { protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) {

View File

@ -68,8 +68,8 @@ public class VCFDiffableReader implements DiffableReader {
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
for ( VCFHeaderLine headerLine : header.getMetaData() ) { for ( VCFHeaderLine headerLine : header.getMetaData() ) {
String key = headerLine.getKey(); String key = headerLine.getKey();
if ( headerLine instanceof VCFNamedHeaderLine ) if ( headerLine instanceof VCFIDHeaderLine)
key += "_" + ((VCFNamedHeaderLine) headerLine).getName(); key += "_" + ((VCFIDHeaderLine) headerLine).getID();
if ( root.hasElement(key) ) if ( root.hasElement(key) )
logger.warn("Skipping duplicate header line: file=" + file + " line=" + headerLine.toString()); logger.warn("Skipping duplicate header line: file=" + file + " line=" + headerLine.toString());
else else

View File

@ -216,12 +216,12 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getID()));
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
for ( VCFHeaderLine field : hInfo ) { for ( VCFHeaderLine field : hInfo ) {
if ( field instanceof VCFFormatHeaderLine) { if ( field instanceof VCFFormatHeaderLine) {
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName()); allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getID());
} }
} }

View File

@ -154,18 +154,24 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data"); throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data");
} else { } else {
if ( str.startsWith("##INFO=") ) { if ( str.startsWith(VCFConstants.INFO_HEADER_START) ) {
VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version); final VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version);
metaData.add(info); metaData.add(info);
infoFields.put(info.getName(), info.getType()); infoFields.put(info.getID(), info.getType());
} else if ( str.startsWith("##FILTER=") ) { } else if ( str.startsWith(VCFConstants.FILTER_HEADER_START) ) {
VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.substring(9),version); final VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.substring(9), version);
metaData.add(filter); metaData.add(filter);
filterFields.add(filter.getName()); filterFields.add(filter.getID());
} else if ( str.startsWith("##FORMAT=") ) { } else if ( str.startsWith(VCFConstants.FORMAT_HEADER_START) ) {
VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.substring(9),version); final VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.substring(9), version);
metaData.add(format); metaData.add(format);
formatFields.put(format.getName(), format.getType()); formatFields.put(format.getID(), format.getType());
} else if ( str.startsWith(VCFConstants.CONTIG_HEADER_START) ) {
final VCFSimpleHeaderLine contig = new VCFSimpleHeaderLine(str.substring(9), version, VCFConstants.CONTIG_HEADER_START.substring(2), null);
metaData.add(contig);
} else if ( str.startsWith(VCFConstants.ALT_HEADER_START) ) {
final VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.substring(6), version, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description"));
metaData.add(alt);
} else { } else {
int equals = str.indexOf("="); int equals = str.indexOf("=");
if ( equals != -1 ) if ( equals != -1 )

View File

@ -1,28 +0,0 @@
package org.broadinstitute.sting.utils.codecs.vcf;
/**
* @author ebanks
* A class representing a key=value entry for ALT fields in the VCF header
*/
public class VCFAltHeaderLine extends VCFSimpleHeaderLine {
/**
* create a VCF filter header line
*
* @param name the name for this header line
* @param description the description for this header line
*/
public VCFAltHeaderLine(String name, String description) {
super(name, description, SupportedHeaderLineType.ALT);
}
/**
* create a VCF info header line
*
* @param line the header line
* @param version the vcf header version
*/
protected VCFAltHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.ALT);
}
}

View File

@ -34,7 +34,7 @@ import java.util.Map;
/** /**
* a base class for compound header lines, which include info lines and format lines (so far) * a base class for compound header lines, which include info lines and format lines (so far)
*/ */
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine {
public enum SupportedHeaderLineType { public enum SupportedHeaderLineType {
INFO(true), FORMAT(false); INFO(true), FORMAT(false);
@ -52,7 +52,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
private VCFHeaderLineType type; private VCFHeaderLineType type;
// access methods // access methods
public String getName() { return name; } public String getID() { return name; }
public String getDescription() { return description; } public String getDescription() { return description; }
public VCFHeaderLineType getType() { return type; } public VCFHeaderLineType getType() { return type; }
public VCFHeaderLineCount getCountType() { return countType; } public VCFHeaderLineCount getCountType() { return countType; }

View File

@ -80,6 +80,13 @@ public final class VCFConstants {
public static final String PHASED_SWITCH_PROB_v3 = "\\"; public static final String PHASED_SWITCH_PROB_v3 = "\\";
public static final String PHASING_TOKENS = "/|\\"; public static final String PHASING_TOKENS = "/|\\";
// header lines
public static final String FILTER_HEADER_START = "##FILTER";
public static final String FORMAT_HEADER_START = "##FORMAT";
public static final String INFO_HEADER_START = "##INFO";
public static final String ALT_HEADER_START = "##ALT";
public static final String CONTIG_HEADER_START = "##contig";
// old indel alleles // old indel alleles
public static final char DELETION_ALLELE_v3 = 'D'; public static final char DELETION_ALLELE_v3 = 'D';
public static final char INSERTION_ALLELE_v3 = 'I'; public static final char INSERTION_ALLELE_v3 = 'I';

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.utils.codecs.vcf; package org.broadinstitute.sting.utils.codecs.vcf;
import java.util.Arrays;
/** /**
* @author ebanks * @author ebanks
* A class representing a key=value entry for FILTER fields in the VCF header * A class representing a key=value entry for FILTER fields in the VCF header
@ -13,7 +15,7 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
* @param description the description for this header line * @param description the description for this header line
*/ */
public VCFFilterHeaderLine(String name, String description) { public VCFFilterHeaderLine(String name, String description) {
super(name, description, SupportedHeaderLineType.FILTER); super("FILTER", name, description);
} }
/** /**
@ -23,6 +25,6 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
* @param version the vcf header version * @param version the vcf header version
*/ */
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.FILTER); super(line, version, "FILTER", Arrays.asList("ID", "Description"));
} }
} }

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.codecs.vcf;
import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.*; import java.util.*;
@ -126,11 +125,11 @@ public class VCFHeader {
for ( VCFHeaderLine line : mMetaData ) { for ( VCFHeaderLine line : mMetaData ) {
if ( line instanceof VCFInfoHeaderLine ) { if ( line instanceof VCFInfoHeaderLine ) {
VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line; VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line;
mInfoMetaData.put(infoLine.getName(), infoLine); mInfoMetaData.put(infoLine.getID(), infoLine);
} }
else if ( line instanceof VCFFormatHeaderLine ) { else if ( line instanceof VCFFormatHeaderLine ) {
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line; VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
mFormatMetaData.put(formatLine.getName(), formatLine); mFormatMetaData.put(formatLine.getID(), formatLine);
} }
else { else {
mOtherMetaData.put(line.getKey(), line); mOtherMetaData.put(line.getKey(), line);

View File

@ -73,10 +73,14 @@ class VCF4Parser implements VCFLineParser {
// validate the tags against the expected list // validate the tags against the expected list
index = 0; index = 0;
if (ret.size() > expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + ret.size() + " in string " + expectedTagOrder.size()); if ( expectedTagOrder != null ) {
for (String str : ret.keySet()) { if ( ret.size() > expectedTagOrder.size() )
if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine); throw new IllegalArgumentException("Unexpected tag count " + ret.size() + " in string " + expectedTagOrder.size());
index++; for ( String str : ret.keySet() ) {
if ( !expectedTagOrder.get(index).equals(str) )
throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine);
index++;
}
} }
return ret; return ret;
} }

View File

@ -24,7 +24,7 @@
package org.broadinstitute.sting.utils.codecs.vcf; package org.broadinstitute.sting.utils.codecs.vcf;
/** an interface for named header lines **/ /** an interface for ID-based header lines **/
public interface VCFNamedHeaderLine { public interface VCFIDHeaderLine {
String getName(); String getID();
} }

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.utils.codecs.vcf; package org.broadinstitute.sting.utils.codecs.vcf;
import java.util.Arrays;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
@ -9,34 +9,35 @@ import java.util.Map;
* @author ebanks * @author ebanks
* A class representing a key=value entry for simple VCF header types * A class representing a key=value entry for simple VCF header types
*/ */
public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine {
public enum SupportedHeaderLineType {
FILTER, ALT;
}
private String name; private String name;
private String description; private Map<String, String> genericFields = new LinkedHashMap<String, String>();
// our type of line, i.e. filter, alt, etc
private final SupportedHeaderLineType lineType;
/** /**
* create a VCF filter header line * create a VCF filter header line
* *
* @param name the name for this header line * @param key the key for this header line
* @param description the description for this header line * @param name the name for this header line
* @param lineType the header line type * @param genericFields other fields for this header line
*/ */
public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) { public VCFSimpleHeaderLine(String key, String name, Map<String, String> genericFields) {
super(lineType.toString(), ""); super(key, "");
this.lineType = lineType; initialize(name, genericFields);
this.name = name; }
this.description = description;
if ( name == null || description == null ) /**
throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description )); * create a VCF filter header line
*
* @param key the key for this header line
* @param name the name for this header line
* @param description description for this header line
*/
public VCFSimpleHeaderLine(String key, String name, String description) {
super(key, "");
Map<String, String> map = new LinkedHashMap<String, String>(1);
map.put("Description", description);
initialize(name, map);
} }
/** /**
@ -44,38 +45,50 @@ public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNa
* *
* @param line the header line * @param line the header line
* @param version the vcf header version * @param version the vcf header version
* @param lineType the header line type * @param key the key for this header line
* @param expectedTagOrdering the tag ordering expected for this header line
*/ */
protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, String key, List<String> expectedTagOrdering) {
super(lineType.toString(), ""); super(key, "");
this.lineType = lineType; Map<String, String> mapping = VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering);
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
name = mapping.get("ID"); name = mapping.get("ID");
description = mapping.get("Description"); initialize(name, mapping);
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided }
description = UNBOUND_DESCRIPTION;
protected void initialize(String name, Map<String, String> genericFields) {
if ( name == null || genericFields == null || genericFields.isEmpty() )
throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s", super.getKey(), name));
this.name = name;
this.genericFields.putAll(genericFields);
} }
protected String toStringEncoding() { protected String toStringEncoding() {
Map<String,Object> map = new LinkedHashMap<String,Object>(); Map<String, Object> map = new LinkedHashMap<String, Object>();
map.put("ID", name); map.put("ID", name);
map.put("Description", description); map.putAll(genericFields);
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); return getKey() + "=" + VCFHeaderLine.toStringEncoding(map);
} }
public boolean equals(Object o) { public boolean equals(Object o) {
if ( !(o instanceof VCFSimpleHeaderLine) ) if ( !(o instanceof VCFSimpleHeaderLine) )
return false; return false;
VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o; VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o;
return name.equals(other.name) && if ( !name.equals(other.name) || genericFields.size() != other.genericFields.size() )
description.equals(other.description); return false;
for ( Map.Entry<String, String> entry : genericFields.entrySet() ) {
if ( !entry.getValue().equals(other.genericFields.get(entry.getKey())) )
return false;
}
return true;
} }
public String getName() { public String getID() {
return name; return name;
} }
public String getDescription() { public Map<String, String> getGenericFields() {
return description; return genericFields;
} }
} }

View File

@ -155,10 +155,10 @@ public class VCFUtils {
for ( VCFHeader source : headers ) { for ( VCFHeader source : headers ) {
//System.out.printf("Merging in header %s%n", source); //System.out.printf("Merging in header %s%n", source);
for ( VCFHeaderLine line : source.getMetaData()) { for ( VCFHeaderLine line : source.getMetaData()) {
String key = line.getKey();
if ( line instanceof VCFNamedHeaderLine) String key = line.getKey();
key = key + "" + ((VCFNamedHeaderLine) line).getName(); if ( line instanceof VCFIDHeaderLine )
key = key + "-" + ((VCFIDHeaderLine)line).getID();
if ( map.containsKey(key) ) { if ( map.containsKey(key) ) {
VCFHeaderLine other = map.get(key); VCFHeaderLine other = map.get(key);
@ -166,8 +166,8 @@ public class VCFUtils {
continue; continue;
else if ( ! line.getClass().equals(other.getClass()) ) else if ( ! line.getClass().equals(other.getClass()) )
throw new IllegalStateException("Incompatible header types: " + line + " " + other ); throw new IllegalStateException("Incompatible header types: " + line + " " + other );
else if ( line instanceof VCFFilterHeaderLine) { else if ( line instanceof VCFFilterHeaderLine ) {
String lineName = ((VCFFilterHeaderLine) line).getName(); String otherName = ((VCFFilterHeaderLine) other).getName(); String lineName = ((VCFFilterHeaderLine) line).getID(); String otherName = ((VCFFilterHeaderLine) other).getID();
if ( ! lineName.equals(otherName) ) if ( ! lineName.equals(otherName) )
throw new IllegalStateException("Incompatible header types: " + line + " " + other ); throw new IllegalStateException("Incompatible header types: " + line + " " + other );
} else if ( line instanceof VCFCompoundHeaderLine ) { } else if ( line instanceof VCFCompoundHeaderLine ) {

View File

@ -80,11 +80,15 @@ public class GATKReportUnitTest extends BaseTest {
@Test @Test
public void testSimpleGATKReport() { public void testSimpleGATKReport() {
GATKReport report = GATKReport.newSimpleReport("TableName", "a", "b", "Roger", "is", "Awesome"); // Create a new simple GATK report named "TableName" with columns: Roger, is, and Awesome
report.addRow("a", 'F', 12, 23.45, true); GATKReport report = GATKReport.newSimpleReport("TableName", "Roger", "is", "Awesome");
report.addRow("ans", '3', 24.5, 456L, 2345);
report.addRow("hi", null, null, "", 2.3);
// Add data to simple GATK report
report.addRow( 12, 23.45, true);
report.addRow("ans", '3', 24.5);
report.addRow("hi", "", 2.3);
// Print the report to console
//report.print(System.out); //report.print(System.out);
try { try {

View File

@ -0,0 +1,29 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.testng.annotations.Test;
import java.io.File;
import java.util.LinkedList;
import java.util.List;
/**
* @author Mauricio Carneiro
* @since 3/7/12
*/
public class BQSRGathererUnitTest {
RecalibrationArgumentCollection RAC;
private static File recal1 = new File("public/testdata/exampleCSV.csv");
private static File recal2 = new File("public/testdata/exampleCSV.2.csv");
@Test(enabled = false)
public void testCombineTwoFiles() {
BQSRGatherer gatherer = new BQSRGatherer();
List<File> recalFiles = new LinkedList<File> ();
File output = new File("foo.csv");
recalFiles.add(recal1);
recalFiles.add(recal2);
gatherer.gather(recalFiles, output);
}
}

View File

@ -50,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
@DataProvider(name = "data") @DataProvider(name = "data")
public Object[][] createData() { public Object[][] createData() {
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dac62fcd25e1052bf18b5707700dda7e"); new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dba5eab2b9587c1062721b164e4fd9a6");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "e10c48dd294fb257802d4e73bb50580d"); new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "de35c93450b46db5fc5516af3c55d62a");
return TestParams.getTests(TestParams.class); return TestParams.getTests(TestParams.class);
} }