Revert "Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable"
This reverts commit 039a6bb01f345322ce2be50ae3634308bb24e77e, reversing changes made to b9c9973d1c638dfc9f8c19b5eb845e99844f9d29.
This commit is contained in:
parent
eb63b48414
commit
baae381acb
|
|
@ -100,7 +100,11 @@ public class GATKReport {
|
||||||
* @param tableDescription the description of the table
|
* @param tableDescription the description of the table
|
||||||
*/
|
*/
|
||||||
public void addTable(String tableName, String tableDescription) {
|
public void addTable(String tableName, String tableDescription) {
|
||||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription);
|
addTable(tableName, tableDescription, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
|
||||||
|
GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey);
|
||||||
tables.put(tableName, table);
|
tables.put(tableName, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.report;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.HashMap;
|
import java.util.*;
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.TreeSet;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
|
@ -96,8 +94,9 @@ public class GATKReportTable {
|
||||||
private String tableDescription;
|
private String tableDescription;
|
||||||
|
|
||||||
private String primaryKeyName;
|
private String primaryKeyName;
|
||||||
private TreeSet<Object> primaryKeyColumn;
|
private Collection<Object> primaryKeyColumn;
|
||||||
private boolean primaryKeyDisplay;
|
private boolean primaryKeyDisplay;
|
||||||
|
boolean sortByPrimaryKey = true;
|
||||||
|
|
||||||
private LinkedHashMap<String, GATKReportColumn> columns;
|
private LinkedHashMap<String, GATKReportColumn> columns;
|
||||||
|
|
||||||
|
|
@ -121,12 +120,17 @@ public class GATKReportTable {
|
||||||
* @param tableDescription the description of the table
|
* @param tableDescription the description of the table
|
||||||
*/
|
*/
|
||||||
public GATKReportTable(String tableName, String tableDescription) {
|
public GATKReportTable(String tableName, String tableDescription) {
|
||||||
if (!isValidName(tableName)) {
|
this(tableName, tableDescription, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
|
||||||
|
if (!isValidName(tableName)) {
|
||||||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
this.tableDescription = tableDescription;
|
this.tableDescription = tableDescription;
|
||||||
|
this.sortByPrimaryKey = sortByPrimaryKey;
|
||||||
|
|
||||||
columns = new LinkedHashMap<String, GATKReportColumn>();
|
columns = new LinkedHashMap<String, GATKReportColumn>();
|
||||||
}
|
}
|
||||||
|
|
@ -137,20 +141,14 @@ public class GATKReportTable {
|
||||||
* @param primaryKeyName the name of the primary key column
|
* @param primaryKeyName the name of the primary key column
|
||||||
*/
|
*/
|
||||||
public void addPrimaryKey(String primaryKeyName) {
|
public void addPrimaryKey(String primaryKeyName) {
|
||||||
if (!isValidName(primaryKeyName)) {
|
addPrimaryKey(primaryKeyName, true);
|
||||||
throw new ReviewedStingException("Attempted to set a GATKReportTable primary key name of '" + primaryKeyName + "'. GATKReportTable primary key names must be purely alphanumeric - no spaces or special characters are allowed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
this.primaryKeyName = primaryKeyName;
|
|
||||||
|
|
||||||
primaryKeyColumn = new TreeSet<Object>();
|
|
||||||
primaryKeyDisplay = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column.
|
* Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column.
|
||||||
*
|
*
|
||||||
* @param primaryKeyName the name of the primary key column
|
* @param primaryKeyName the name of the primary key column
|
||||||
|
* @param display should this primary key be displayed?
|
||||||
*/
|
*/
|
||||||
public void addPrimaryKey(String primaryKeyName, boolean display) {
|
public void addPrimaryKey(String primaryKeyName, boolean display) {
|
||||||
if (!isValidName(primaryKeyName)) {
|
if (!isValidName(primaryKeyName)) {
|
||||||
|
|
@ -159,7 +157,7 @@ public class GATKReportTable {
|
||||||
|
|
||||||
this.primaryKeyName = primaryKeyName;
|
this.primaryKeyName = primaryKeyName;
|
||||||
|
|
||||||
primaryKeyColumn = new TreeSet<Object>();
|
primaryKeyColumn = sortByPrimaryKey ? new TreeSet<Object>() : new LinkedList<Object>();
|
||||||
primaryKeyDisplay = display;
|
primaryKeyDisplay = display;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,10 +30,16 @@ import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
/**
|
/**
|
||||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
|
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
|
||||||
* in the input file. It can dynamically merge the contents of multiple input BAM files, resulting
|
* in the input file. It can dynamically merge the contents of multiple input BAM files, resulting
|
||||||
|
|
@ -52,6 +58,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
String platform = null; // E.g. ILLUMINA, 454
|
String platform = null; // E.g. ILLUMINA, 454
|
||||||
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
||||||
int nReadsToPrint = -1;
|
int nReadsToPrint = -1;
|
||||||
|
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
|
||||||
|
public Set<File> sampleFiles;
|
||||||
|
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
|
||||||
|
public Set<String> sampleNames;
|
||||||
|
|
||||||
|
private TreeSet<String> samplesToChoose = new TreeSet<String>();
|
||||||
|
private boolean NO_SAMPLES_SPECIFIED = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The initialize function.
|
* The initialize function.
|
||||||
|
|
@ -59,6 +72,17 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
if ( platform != null )
|
if ( platform != null )
|
||||||
platform = platform.toUpperCase();
|
platform = platform.toUpperCase();
|
||||||
|
|
||||||
|
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||||
|
samplesToChoose.addAll(samplesFromFile);
|
||||||
|
|
||||||
|
if (sampleNames != null)
|
||||||
|
samplesToChoose.addAll(sampleNames);
|
||||||
|
|
||||||
|
if(samplesToChoose.isEmpty()) {
|
||||||
|
NO_SAMPLES_SPECIFIED = true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -85,6 +109,22 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform))
|
if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (!NO_SAMPLES_SPECIFIED ) {
|
||||||
|
// user specified samples to select
|
||||||
|
String readSample = read.getReadGroup().getSample();
|
||||||
|
boolean found = false;
|
||||||
|
for (String sampleSelected : samplesToChoose) {
|
||||||
|
if (readSample.equalsIgnoreCase(sampleSelected)) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!found)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// check if we've reached the output limit
|
// check if we've reached the output limit
|
||||||
if ( nReadsToPrint == 0 ) {
|
if ( nReadsToPrint == 0 ) {
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,27 @@ public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation {
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
||||||
int run;
|
int run;
|
||||||
if ( vc.isIndel() && vc.isBiallelic() ) {
|
if (vc.isMixed()) {
|
||||||
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
|
map.put(getKeyNames().get(0), String.format("%s", "MIXED"));
|
||||||
|
return map;
|
||||||
|
|
||||||
|
}
|
||||||
|
else if ( vc.isIndel() ) {
|
||||||
String type="";
|
String type="";
|
||||||
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
|
if (!vc.isBiallelic())
|
||||||
for (int k : inds) {
|
type = "MULTIALLELIC_INDEL";
|
||||||
type = type+ IndelUtils.getIndelClassificationName(k)+".";
|
else {
|
||||||
|
if (vc.isInsertion())
|
||||||
|
type = "INS.";
|
||||||
|
else if (vc.isDeletion())
|
||||||
|
type = "DEL.";
|
||||||
|
else
|
||||||
|
type = "OTHER.";
|
||||||
|
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
|
||||||
|
for (int k : inds) {
|
||||||
|
type = type+ IndelUtils.getIndelClassificationName(k)+".";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
map.put(getKeyNames().get(0), String.format("%s", type));
|
map.put(getKeyNames().get(0), String.format("%s", type));
|
||||||
|
|
|
||||||
|
|
@ -29,9 +29,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMRecordIterator;
|
import net.sf.samtools.SAMRecordIterator;
|
||||||
import net.sf.samtools.util.BlockCompressedInputStream;
|
import net.sf.samtools.util.BlockCompressedInputStream;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.*;
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -102,8 +100,10 @@ public class BAMDiffableReader implements DiffableReader {
|
||||||
final byte[] BAM_MAGIC = "BAM\1".getBytes();
|
final byte[] BAM_MAGIC = "BAM\1".getBytes();
|
||||||
final byte[] buffer = new byte[BAM_MAGIC.length];
|
final byte[] buffer = new byte[BAM_MAGIC.length];
|
||||||
try {
|
try {
|
||||||
FileInputStream fstream = new FileInputStream(file);
|
InputStream fstream = new BufferedInputStream(new FileInputStream(file));
|
||||||
new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length);
|
if ( !BlockCompressedInputStream.isValidFile(fstream) )
|
||||||
|
return false;
|
||||||
|
new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length);
|
||||||
return Arrays.equals(buffer, BAM_MAGIC);
|
return Arrays.equals(buffer, BAM_MAGIC);
|
||||||
} catch ( IOException e ) {
|
} catch ( IOException e ) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -143,7 +143,7 @@ public class DiffEngine {
|
||||||
* Not that only pairs of the same length are considered as potentially equivalent
|
* Not that only pairs of the same length are considered as potentially equivalent
|
||||||
*
|
*
|
||||||
* @param params determines how we display the items
|
* @param params determines how we display the items
|
||||||
* @param diffs
|
* @param diffs the list of differences to summarize
|
||||||
*/
|
*/
|
||||||
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
||||||
printSummaryReport(summarizeDifferences(diffs), params );
|
printSummaryReport(summarizeDifferences(diffs), params );
|
||||||
|
|
@ -207,14 +207,7 @@ public class DiffEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
|
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
|
||||||
GATKReport report = new GATKReport();
|
List<Difference> toShow = new ArrayList<Difference>();
|
||||||
final String tableName = "diffences";
|
|
||||||
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information");
|
|
||||||
GATKReportTable table = report.getTable(tableName);
|
|
||||||
table.addPrimaryKey("Difference", true);
|
|
||||||
table.addColumn("NumberOfOccurrences", 0);
|
|
||||||
table.addColumn("SpecificDifference", 0);
|
|
||||||
|
|
||||||
int count = 0, count1 = 0;
|
int count = 0, count1 = 0;
|
||||||
for ( Difference diff : sortedSummaries ) {
|
for ( Difference diff : sortedSummaries ) {
|
||||||
if ( diff.getCount() < params.minSumDiffToShow )
|
if ( diff.getCount() < params.minSumDiffToShow )
|
||||||
|
|
@ -230,10 +223,26 @@ public class DiffEngine {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
|
toShow.add(diff);
|
||||||
table.set(diff.getPath(), "SpecificDifference", diff.valueDiffString());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if we want it in descending order, reverse the list
|
||||||
|
if ( ! params.descending ) {
|
||||||
|
Collections.reverse(toShow);
|
||||||
|
}
|
||||||
|
|
||||||
|
// now that we have a specific list of values we want to show, display them
|
||||||
|
GATKReport report = new GATKReport();
|
||||||
|
final String tableName = "diffences";
|
||||||
|
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
|
||||||
|
GATKReportTable table = report.getTable(tableName);
|
||||||
|
table.addPrimaryKey("Difference", true);
|
||||||
|
table.addColumn("NumberOfOccurrences", 0);
|
||||||
|
table.addColumn("ExampleDifference", 0);
|
||||||
|
for ( Difference diff : toShow ) {
|
||||||
|
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
|
||||||
|
table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString());
|
||||||
|
}
|
||||||
table.write(params.out);
|
table.write(params.out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -252,7 +261,7 @@ public class DiffEngine {
|
||||||
* commonPostfixLength: how many parts are shared at the end, suppose its 2
|
* commonPostfixLength: how many parts are shared at the end, suppose its 2
|
||||||
* We want to create a string *.*.C.D
|
* We want to create a string *.*.C.D
|
||||||
*
|
*
|
||||||
* @param parts
|
* @param parts the separated path values [above without .]
|
||||||
* @param commonPostfixLength
|
* @param commonPostfixLength
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
|
|
@ -351,6 +360,7 @@ public class DiffEngine {
|
||||||
int maxItemsToDisplay = 0;
|
int maxItemsToDisplay = 0;
|
||||||
int maxCountOneItems = 0;
|
int maxCountOneItems = 0;
|
||||||
int minSumDiffToShow = 0;
|
int minSumDiffToShow = 0;
|
||||||
|
boolean descending = true;
|
||||||
|
|
||||||
public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
|
public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
|
||||||
this.out = out;
|
this.out = out;
|
||||||
|
|
@ -358,5 +368,9 @@ public class DiffEngine {
|
||||||
this.maxCountOneItems = maxCountOneItems;
|
this.maxCountOneItems = maxCountOneItems;
|
||||||
this.minSumDiffToShow = minSumDiffToShow;
|
this.minSumDiffToShow = minSumDiffToShow;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setDescending(boolean descending) {
|
||||||
|
this.descending = descending;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -112,6 +112,7 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
|
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
|
||||||
|
params.setDescending(false);
|
||||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -72,13 +72,19 @@ public class VCFDiffableReader implements DiffableReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
String line = lineReader.readLine();
|
String line = lineReader.readLine();
|
||||||
int count = 0;
|
int count = 0, nRecordsAtPos = 1;
|
||||||
|
String prevName = "";
|
||||||
while ( line != null ) {
|
while ( line != null ) {
|
||||||
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
||||||
String name = vc.getChr() + ":" + vc.getStart();
|
String name = vc.getChr() + ":" + vc.getStart();
|
||||||
|
if ( name.equals(prevName) ) {
|
||||||
|
name += "_" + ++nRecordsAtPos;
|
||||||
|
} else {
|
||||||
|
prevName = name;
|
||||||
|
}
|
||||||
DiffNode vcRoot = DiffNode.empty(name, root);
|
DiffNode vcRoot = DiffNode.empty(name, root);
|
||||||
|
|
||||||
// add fields
|
// add fields
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
|
import org.apache.poi.hpsf.Variant;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
|
@ -149,7 +150,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
// get all of the vcf rods at this locus
|
// get all of the vcf rods at this locus
|
||||||
// Need to provide reference bases to simpleMerge starting at current locus
|
// Need to provide reference bases to simpleMerge starting at current locus
|
||||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, null,context.getLocation(), true, false);
|
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false);
|
||||||
|
|
||||||
if ( sitesOnlyVCF ) {
|
if ( sitesOnlyVCF ) {
|
||||||
vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);
|
vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);
|
||||||
|
|
@ -172,17 +173,25 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
||||||
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
VariantContext mergedVC;
|
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||||
if ( master ) {
|
if ( master ) {
|
||||||
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
|
mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master"));
|
||||||
} else {
|
} else {
|
||||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType,
|
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
|
// iterate over the types so that it's deterministic
|
||||||
|
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
||||||
|
if ( VCsByType.containsKey(type) )
|
||||||
|
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||||
|
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||||
|
ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
for ( VariantContext mergedVC : mergedVCs ) {
|
||||||
|
// only operate at the start of events
|
||||||
|
if ( mergedVC == null )
|
||||||
|
continue;
|
||||||
|
|
||||||
if ( mergedVC != null ) { // only operate at the start of events
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
|
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
|
||||||
// re-compute chromosome counts
|
// re-compute chromosome counts
|
||||||
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
|
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,16 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
|
@ -44,6 +54,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.lang.annotation.AnnotationFormatError;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -91,6 +104,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
|
@Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
|
||||||
private boolean KEEP_AF_SPECTRUM = false;
|
private boolean KEEP_AF_SPECTRUM = false;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false)
|
||||||
|
private File AF_FILE = new File("");
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName="family_structure_file", shortName="familyFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||||
|
private File FAMILY_STRUCTURE_FILE = null;
|
||||||
|
|
||||||
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||||
private String FAMILY_STRUCTURE = "";
|
private String FAMILY_STRUCTURE = "";
|
||||||
|
|
@ -113,6 +133,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="selectIndels", shortName="indels", doc="Select only Indels.", required=false)
|
@Argument(fullName="selectIndels", shortName="indels", doc="Select only Indels.", required=false)
|
||||||
private boolean SELECT_INDELS = false;
|
private boolean SELECT_INDELS = false;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName="outMVFile", shortName="outMVFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||||
|
private String outMVFile = null;
|
||||||
|
|
||||||
/* Private class used to store the intermediate variants in the integer random selection process */
|
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||||
private class RandomVariantStructure {
|
private class RandomVariantStructure {
|
||||||
|
|
@ -140,7 +163,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
private boolean DISCORDANCE_ONLY = false;
|
private boolean DISCORDANCE_ONLY = false;
|
||||||
private boolean CONCORDANCE_ONLY = false;
|
private boolean CONCORDANCE_ONLY = false;
|
||||||
|
|
||||||
private MendelianViolation mv;
|
private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>();
|
||||||
|
|
||||||
/* default name for the variant dataset (VCF) */
|
/* default name for the variant dataset (VCF) */
|
||||||
private final String variantRodName = "variant";
|
private final String variantRodName = "variant";
|
||||||
|
|
@ -155,8 +178,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
private RandomVariantStructure [] variantArray;
|
private RandomVariantStructure [] variantArray;
|
||||||
|
|
||||||
|
|
||||||
|
/* Variables used for random selection with AF boosting */
|
||||||
|
private ArrayList<Double> afBreakpoints = null;
|
||||||
|
private ArrayList<Double> afBoosts = null;
|
||||||
|
double bkDelta = 0.0;
|
||||||
|
|
||||||
|
|
||||||
|
private PrintStream outMVFileStream = null;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
|
|
@ -212,10 +241,29 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
CONCORDANCE_ONLY = concordanceRodName.length() > 0;
|
CONCORDANCE_ONLY = concordanceRodName.length() > 0;
|
||||||
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName);
|
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName);
|
||||||
|
|
||||||
if (MENDELIAN_VIOLATIONS)
|
if (MENDELIAN_VIOLATIONS) {
|
||||||
mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
if ( FAMILY_STRUCTURE_FILE != null) {
|
||||||
|
try {
|
||||||
|
for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) {
|
||||||
|
MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||||
|
if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom()))
|
||||||
|
mvSet.add(mv);
|
||||||
|
}
|
||||||
|
} catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(AF_FILE, e);
|
||||||
|
}
|
||||||
|
if (outMVFile != null)
|
||||||
|
try {
|
||||||
|
outMVFileStream = new PrintStream(outMVFile);
|
||||||
|
}
|
||||||
|
catch (FileNotFoundException e) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
||||||
|
}
|
||||||
|
else
|
||||||
|
mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||||
|
}
|
||||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||||
mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||||
MENDELIAN_VIOLATIONS = true;
|
MENDELIAN_VIOLATIONS = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -227,6 +275,33 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
SELECT_RANDOM_FRACTION = fractionRandom > 0;
|
SELECT_RANDOM_FRACTION = fractionRandom > 0;
|
||||||
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track");
|
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track");
|
||||||
|
|
||||||
|
|
||||||
|
if (KEEP_AF_SPECTRUM) {
|
||||||
|
try {
|
||||||
|
afBreakpoints = new ArrayList<Double>();
|
||||||
|
afBoosts = new ArrayList<Double>();
|
||||||
|
logger.info("Reading in AF boost table...");
|
||||||
|
boolean firstLine = false;
|
||||||
|
for ( final String line : new XReadLines( AF_FILE ) ) {
|
||||||
|
if (!firstLine) {
|
||||||
|
firstLine = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final String[] vals = line.split(" ");
|
||||||
|
|
||||||
|
double bkp = Double.valueOf(vals[0]);
|
||||||
|
double afb = Double.valueOf(vals[1]);
|
||||||
|
afBreakpoints.add(bkp);
|
||||||
|
afBoosts.add(afb);
|
||||||
|
|
||||||
|
}
|
||||||
|
bkDelta = afBreakpoints.get(0);
|
||||||
|
} catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(AF_FILE, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -250,9 +325,24 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
for (VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
if (MENDELIAN_VIOLATIONS) {
|
if (MENDELIAN_VIOLATIONS) {
|
||||||
if (!mv.isViolation(vc)) {
|
boolean foundMV = false;
|
||||||
break;
|
for (MendelianViolation mv : mvSet) {
|
||||||
|
if (mv.isViolation(vc)) {
|
||||||
|
foundMV = true;
|
||||||
|
//System.out.println(vc.toString());
|
||||||
|
if (outMVFile != null)
|
||||||
|
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||||
|
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||||
|
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)),
|
||||||
|
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(),
|
||||||
|
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
||||||
|
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
||||||
|
vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!foundMV)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (DISCORDANCE_ONLY) {
|
if (DISCORDANCE_ONLY) {
|
||||||
Collection<VariantContext> compVCs = tracker.getVariantContexts(ref, discordanceRodName, null, context.getLocation(), true, false);
|
Collection<VariantContext> compVCs = tracker.getVariantContexts(ref, discordanceRodName, null, context.getLocation(), true, false);
|
||||||
|
|
@ -283,46 +373,59 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
if (SELECT_RANDOM_NUMBER) {
|
if (SELECT_RANDOM_NUMBER) {
|
||||||
randomlyAddVariant(++variantNumber, sub, ref.getBase());
|
randomlyAddVariant(++variantNumber, sub, ref.getBase());
|
||||||
}
|
}
|
||||||
else if (!SELECT_RANDOM_FRACTION || GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom) {
|
else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||||
vcfWriter.add(sub, ref.getBase());
|
vcfWriter.add(sub, ref.getBase());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
|
if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
|
||||||
Collection<VariantContext> compVCs = tracker.getVariantContexts(ref, inputAFRodName, null, context.getLocation(), true, false);
|
|
||||||
if (compVCs.isEmpty())
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
|
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
|
||||||
// We then pick a variant with probablity AF*desiredFraction
|
// We then pick a variant with probablity AF*desiredFraction
|
||||||
for (VariantContext compVC : compVCs) {
|
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
||||||
if ( compVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||||
String afo = compVC.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
|
|
||||||
|
|
||||||
double af;
|
double af;
|
||||||
if (afo.contains(",")) {
|
double afBoost = 1.0;
|
||||||
String[] afs = afo.split(",");
|
if (afo.contains(",")) {
|
||||||
afs[0] = afs[0].substring(1,afs[0].length());
|
String[] afs = afo.split(",");
|
||||||
afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
|
afs[0] = afs[0].substring(1,afs[0].length());
|
||||||
|
afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
|
||||||
|
|
||||||
double[] afd = new double[afs.length];
|
double[] afd = new double[afs.length];
|
||||||
|
|
||||||
for (int k=0; k < afd.length; k++)
|
for (int k=0; k < afd.length; k++)
|
||||||
afd[k] = Double.valueOf(afs[k]);
|
afd[k] = Double.valueOf(afs[k]);
|
||||||
|
|
||||||
af = MathUtils.arrayMax(afd);
|
af = MathUtils.arrayMax(afd);
|
||||||
//af = Double.valueOf(afs[0]);
|
//af = Double.valueOf(afs[0]);
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
af = Double.valueOf(afo);
|
|
||||||
|
|
||||||
//System.out.format("%s .. %4.4f\n",afo.toString(), af);
|
|
||||||
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * af)
|
|
||||||
vcfWriter.add(sub, ref.getBase());
|
|
||||||
}
|
}
|
||||||
break; // do only one vc
|
else
|
||||||
|
af = Double.valueOf(afo);
|
||||||
|
|
||||||
|
// now boost af by table read from file if desired
|
||||||
|
//double bkpt = 0.0;
|
||||||
|
int bkidx = 0;
|
||||||
|
if (!afBreakpoints.isEmpty()) {
|
||||||
|
for ( Double bkpt : afBreakpoints) {
|
||||||
|
if (af < bkpt + bkDelta)
|
||||||
|
break;
|
||||||
|
else bkidx++;
|
||||||
|
}
|
||||||
|
if (bkidx >=afBoosts.size())
|
||||||
|
bkidx = afBoosts.size()-1;
|
||||||
|
afBoost = afBoosts.get(bkidx);
|
||||||
|
//System.out.formatPrin("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.format("%s .. %4.4f\n",afo.toString(), af);
|
||||||
|
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost)
|
||||||
|
vcfWriter.add(sub, ref.getBase());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -406,8 +509,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
private boolean haveSameGenotypes(Genotype g1, Genotype g2) {
|
private boolean haveSameGenotypes(Genotype g1, Genotype g2) {
|
||||||
if ((g1.isCalled() && g2.isFiltered()) ||
|
if ((g1.isCalled() && g2.isFiltered()) ||
|
||||||
(g2.isCalled() && g1.isFiltered()) ||
|
(g2.isCalled() && g1.isFiltered()) ||
|
||||||
(g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED))
|
(g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
List<Allele> a1s = g1.getAlleles();
|
List<Allele> a1s = g1.getAlleles();
|
||||||
|
|
@ -440,7 +543,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
* @param vc the VariantContext record to subset
|
* @param vc the VariantContext record to subset
|
||||||
* @param samples the samples to extract
|
* @param samples the samples to extract
|
||||||
* @return the subsetted VariantContext
|
* @return the subsetted VariantContext
|
||||||
*/
|
*/
|
||||||
private VariantContext subsetRecord(VariantContext vc, Set<String> samples) {
|
private VariantContext subsetRecord(VariantContext vc, Set<String> samples) {
|
||||||
if ( samples == null || samples.isEmpty() )
|
if ( samples == null || samples.isEmpty() )
|
||||||
return vc;
|
return vc;
|
||||||
|
|
@ -450,7 +553,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
if ( samples.contains(genotypePair.getKey()) )
|
if ( samples.contains(genotypePair.getKey()) )
|
||||||
genotypes.add(genotypePair.getValue());
|
genotypes.add(genotypePair.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());
|
VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());
|
||||||
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());
|
HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());
|
||||||
|
|
@ -460,7 +563,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
Genotype g = sub.getGenotype(sample);
|
Genotype g = sub.getGenotype(sample);
|
||||||
|
|
||||||
if (g.isNotFiltered() && g.isCalled()) {
|
if (g.isNotFiltered() && g.isCalled()) {
|
||||||
|
|
||||||
String dp = (String) g.getAttribute("DP");
|
String dp = (String) g.getAttribute("DP");
|
||||||
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||||
depth += Integer.valueOf(dp);
|
depth += Integer.valueOf(dp);
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
|
@ -33,7 +35,6 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -74,17 +75,29 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
|
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
|
||||||
getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } });
|
getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } });
|
||||||
getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
|
getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
|
||||||
getters.put("REF", new Getter() { public String get(VariantContext vc) { return vc.getReference().toString(); } });
|
getters.put("REF", new Getter() {
|
||||||
|
public String get(VariantContext vc) {
|
||||||
|
String x = "";
|
||||||
|
if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
|
||||||
|
Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
|
||||||
|
x=x+new String(new byte[]{refByte});
|
||||||
|
}
|
||||||
|
return x+vc.getReference().getDisplayString();
|
||||||
|
}
|
||||||
|
});
|
||||||
getters.put("ALT", new Getter() {
|
getters.put("ALT", new Getter() {
|
||||||
public String get(VariantContext vc) {
|
public String get(VariantContext vc) {
|
||||||
StringBuilder x = new StringBuilder();
|
StringBuilder x = new StringBuilder();
|
||||||
int n = vc.getAlternateAlleles().size();
|
int n = vc.getAlternateAlleles().size();
|
||||||
|
|
||||||
if ( n == 0 ) return ".";
|
if ( n == 0 ) return ".";
|
||||||
|
if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
|
||||||
|
Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
|
||||||
|
x.append(new String(new byte[]{refByte}));
|
||||||
|
}
|
||||||
|
|
||||||
for ( int i = 0; i < n; i++ ) {
|
for ( int i = 0; i < n; i++ ) {
|
||||||
if ( i != 0 ) x.append(",");
|
if ( i != 0 ) x.append(",");
|
||||||
x.append(vc.getAlternateAllele(i).toString());
|
x.append(vc.getAlternateAllele(i).getDisplayString());
|
||||||
}
|
}
|
||||||
return x.toString();
|
return x.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -168,6 +181,31 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc));
|
throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (field.equals("AF") || field.equals("AC")) {
|
||||||
|
String afo = val;
|
||||||
|
|
||||||
|
double af=0;
|
||||||
|
if (afo.contains(",")) {
|
||||||
|
String[] afs = afo.split(",");
|
||||||
|
afs[0] = afs[0].substring(1,afs[0].length());
|
||||||
|
afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
|
||||||
|
|
||||||
|
double[] afd = new double[afs.length];
|
||||||
|
|
||||||
|
for (int k=0; k < afd.length; k++)
|
||||||
|
afd[k] = Double.valueOf(afs[k]);
|
||||||
|
|
||||||
|
af = MathUtils.arrayMax(afd);
|
||||||
|
//af = Double.valueOf(afs[0]);
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if (!afo.equals("NA"))
|
||||||
|
af = Double.valueOf(afo);
|
||||||
|
|
||||||
|
val = Double.toString(af);
|
||||||
|
|
||||||
|
}
|
||||||
vals.add(val);
|
vals.add(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -289,8 +289,8 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a newly allocated VC that is the same as VC, but without genotypes
|
* Returns a newly allocated VC that is the same as VC, but without genotypes
|
||||||
* @param vc
|
* @param vc variant context
|
||||||
* @return
|
* @return new VC without genotypes
|
||||||
*/
|
*/
|
||||||
@Requires("vc != null")
|
@Requires("vc != null")
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
|
|
@ -303,8 +303,8 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes
|
* Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes
|
||||||
* @param vcs
|
* @param vcs collection of VCs
|
||||||
* @return
|
* @return new VCs without genotypes
|
||||||
*/
|
*/
|
||||||
@Requires("vcs != null")
|
@Requires("vcs != null")
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
|
|
@ -362,9 +362,9 @@ public class VariantContextUtils {
|
||||||
* information per genotype. The master merge will add the PQ information from each genotype record, where
|
* information per genotype. The master merge will add the PQ information from each genotype record, where
|
||||||
* appropriate, to the master VC.
|
* appropriate, to the master VC.
|
||||||
*
|
*
|
||||||
* @param unsortedVCs
|
* @param unsortedVCs collection of VCs
|
||||||
* @param masterName
|
* @param masterName name of master VC
|
||||||
* @return
|
* @return master-merged VC
|
||||||
*/
|
*/
|
||||||
public static VariantContext masterMerge(Collection<VariantContext> unsortedVCs, String masterName) {
|
public static VariantContext masterMerge(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||||
VariantContext master = findMaster(unsortedVCs, masterName);
|
VariantContext master = findMaster(unsortedVCs, masterName);
|
||||||
|
|
@ -435,11 +435,15 @@ public class VariantContextUtils {
|
||||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||||
* the sample name
|
* the sample name
|
||||||
*
|
*
|
||||||
* @param unsortedVCs
|
* @param genomeLocParser loc parser
|
||||||
* @param priorityListOfVCs
|
* @param unsortedVCs collection of unsorted VCs
|
||||||
* @param filteredRecordMergeType
|
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||||
* @param genotypeMergeOptions
|
* @param filteredRecordMergeType merge type for filtered records
|
||||||
* @return
|
* @param genotypeMergeOptions merge option for genotypes
|
||||||
|
* @param annotateOrigin should we annotate the set it came from?
|
||||||
|
* @param printMessages should we print messages?
|
||||||
|
* @param inputRefBase the ref base
|
||||||
|
* @return new VariantContext
|
||||||
*/
|
*/
|
||||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||||
|
|
@ -448,6 +452,24 @@ public class VariantContextUtils {
|
||||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||||
|
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||||
|
* the sample name
|
||||||
|
*
|
||||||
|
* @param genomeLocParser loc parser
|
||||||
|
* @param unsortedVCs collection of unsorted VCs
|
||||||
|
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||||
|
* @param filteredRecordMergeType merge type for filtered records
|
||||||
|
* @param genotypeMergeOptions merge option for genotypes
|
||||||
|
* @param annotateOrigin should we annotate the set it came from?
|
||||||
|
* @param printMessages should we print messages?
|
||||||
|
* @param inputRefBase the ref base
|
||||||
|
* @param setKey the key name of the set
|
||||||
|
* @param filteredAreUncalled are filtered records uncalled?
|
||||||
|
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||||
|
* @return new VariantContext
|
||||||
|
*/
|
||||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
||||||
|
|
@ -470,7 +492,7 @@ public class VariantContextUtils {
|
||||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||||
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
|
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
|
||||||
}
|
}
|
||||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled
|
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// establish the baseline info from the first VC
|
// establish the baseline info from the first VC
|
||||||
|
|
@ -615,6 +637,17 @@ public class VariantContextUtils {
|
||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||||
|
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||||
|
for ( VariantContext vc : VCs ) {
|
||||||
|
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||||
|
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||||
|
mappedVCs.get(vc.getType()).add(vc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return mappedVCs;
|
||||||
|
}
|
||||||
|
|
||||||
private static class AlleleMapper {
|
private static class AlleleMapper {
|
||||||
private VariantContext vc = null;
|
private VariantContext vc = null;
|
||||||
private Map<Allele, Allele> map = null;
|
private Map<Allele, Allele> map = null;
|
||||||
|
|
@ -834,6 +867,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a genome location, given a variant context
|
* create a genome location, given a variant context
|
||||||
|
* @param genomeLocParser parser
|
||||||
* @param vc the variant context
|
* @param vc the variant context
|
||||||
* @return the genomeLoc
|
* @return the genomeLoc
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -52,8 +52,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@DataProvider(name = "data")
|
@DataProvider(name = "data")
|
||||||
public Object[][] createData() {
|
public Object[][] createData() {
|
||||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "fb7f4e011487ca56bce865ae5468cdc5");
|
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e");
|
||||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "423cec3befbf0a72d8bc3757ee628fc4");
|
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc");
|
||||||
return TestParams.getTests(TestParams.class);
|
return TestParams.getTests(TestParams.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -80,9 +80,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
||||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
||||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); }
|
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); }
|
||||||
|
|
||||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); }
|
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); }
|
||||||
|
|
||||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
|
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
|
||||||
|
|
||||||
|
|
@ -100,7 +100,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5"));
|
Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e"));
|
||||||
executeTest("threeWayWithRefs", spec);
|
executeTest("threeWayWithRefs", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testComplexVariantsToTable() {
|
public void testComplexVariantsToTable() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(" -AMD"),
|
WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(" -AMD"),
|
||||||
Arrays.asList("b2a3712c1bfad8f1383ffada8b5017ba"));
|
Arrays.asList("e8f771995127b727fb433da91dd4ee98"));
|
||||||
executeTest("testComplexVariantsToTable", spec).getFirst();
|
executeTest("testComplexVariantsToTable", spec).getFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@
|
||||||
<class name="edu.mit.broad.picard.genotype.geli.GeliFileWriter" />
|
<class name="edu.mit.broad.picard.genotype.geli.GeliFileWriter" />
|
||||||
<class name="edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods" />
|
<class name="edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods" />
|
||||||
<class name="edu.mit.broad.picard.util.PasteParser" />
|
<class name="edu.mit.broad.picard.util.PasteParser" />
|
||||||
|
<class name="edu.mit.broad.picard.util.PicardAggregationFsUtil" />
|
||||||
|
<class name="edu.mit.broad.picard.util.PicardFileSystemUtil" />
|
||||||
<class name="edu.mit.broad.picard.variation.KnownVariantCodecV2" />
|
<class name="edu.mit.broad.picard.variation.KnownVariantCodecV2" />
|
||||||
<class name="edu.mit.broad.picard.variation.KnownVariantCodec" />
|
<class name="edu.mit.broad.picard.variation.KnownVariantCodec" />
|
||||||
<class name="edu.mit.broad.picard.variation.KnownVariantFileHeader" />
|
<class name="edu.mit.broad.picard.variation.KnownVariantFileHeader" />
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,9 @@ class DataProcessingPipeline extends QScript {
|
||||||
@Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false)
|
@Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false)
|
||||||
var bwaThreads: Int = 1
|
var bwaThreads: Int = 1
|
||||||
|
|
||||||
|
@Input(doc="Dont perform validation on the BAM files", fullName="no_validation", shortName="nv", required=false)
|
||||||
|
var noValidation: Boolean = false
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Global Variables
|
* Global Variables
|
||||||
|
|
@ -135,7 +138,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
println("\n\n*** DEBUG ***\n")
|
println("\n\n*** INPUT FILES ***\n")
|
||||||
// Creating one file for each sample in the dataset
|
// Creating one file for each sample in the dataset
|
||||||
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||||
for ((sample, flist) <- sampleTable) {
|
for ((sample, flist) <- sampleTable) {
|
||||||
|
|
@ -149,7 +152,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
sampleBamFiles(sample) = sampleFileName
|
sampleBamFiles(sample) = sampleFileName
|
||||||
add(joinBams(flist, sampleFileName))
|
add(joinBams(flist, sampleFileName))
|
||||||
}
|
}
|
||||||
println("*** DEBUG ***\n\n")
|
println("*** INPUT FILES ***\n\n")
|
||||||
|
|
||||||
return sampleBamFiles.toMap
|
return sampleBamFiles.toMap
|
||||||
}
|
}
|
||||||
|
|
@ -246,7 +249,12 @@ class DataProcessingPipeline extends QScript {
|
||||||
val preValidateLog = swapExt(bam, ".bam", ".pre.validation")
|
val preValidateLog = swapExt(bam, ".bam", ".pre.validation")
|
||||||
val postValidateLog = swapExt(bam, ".bam", ".post.validation")
|
val postValidateLog = swapExt(bam, ".bam", ".post.validation")
|
||||||
|
|
||||||
add(validate(bam, preValidateLog))
|
// Validation is an optional step for the BAM file generated after
|
||||||
|
// alignment and the final bam file of the pipeline.
|
||||||
|
if (!noValidation) {
|
||||||
|
add(validate(bam, preValidateLog),
|
||||||
|
validate(recalBam, postValidateLog))
|
||||||
|
}
|
||||||
|
|
||||||
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||||
add(target(bam, targetIntervals))
|
add(target(bam, targetIntervals))
|
||||||
|
|
@ -257,8 +265,8 @@ class DataProcessingPipeline extends QScript {
|
||||||
recal(dedupedBam, preRecalFile, recalBam),
|
recal(dedupedBam, preRecalFile, recalBam),
|
||||||
cov(recalBam, postRecalFile),
|
cov(recalBam, postRecalFile),
|
||||||
analyzeCovariates(preRecalFile, preOutPath),
|
analyzeCovariates(preRecalFile, preOutPath),
|
||||||
analyzeCovariates(postRecalFile, postOutPath),
|
analyzeCovariates(postRecalFile, postOutPath))
|
||||||
validate(recalBam, postValidateLog))
|
|
||||||
|
|
||||||
cohortList :+= recalBam
|
cohortList :+= recalBam
|
||||||
}
|
}
|
||||||
|
|
@ -282,6 +290,13 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.isIntermediate = true
|
this.isIntermediate = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// General arguments to non-GATK tools
|
||||||
|
trait ExternalCommonArgs extends CommandLineFunction {
|
||||||
|
this.memoryLimit = 4
|
||||||
|
this.isIntermediate = true
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||||
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||||
this.input_file :+= inBams
|
this.input_file :+= inBams
|
||||||
|
|
@ -300,8 +315,8 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.targetIntervals = tIntervals
|
this.targetIntervals = tIntervals
|
||||||
this.out = outBam
|
this.out = outBam
|
||||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||||
if (!indels.isEmpty)
|
if (!qscript.indels.isEmpty)
|
||||||
this.rodBind :+= RodBind("indels", "VCF", indels)
|
this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
|
||||||
this.consensusDeterminationModel = consensusDeterminationModel
|
this.consensusDeterminationModel = consensusDeterminationModel
|
||||||
this.compress = 0
|
this.compress = 0
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = nContigs
|
||||||
|
|
@ -332,7 +347,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.isIntermediate = false
|
this.isIntermediate = false
|
||||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -350,48 +364,41 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||||
}
|
}
|
||||||
|
|
||||||
case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates {
|
case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs {
|
||||||
this.input = List(inBam)
|
this.input = List(inBam)
|
||||||
this.output = outBam
|
this.output = outBam
|
||||||
this.metrics = metricsFile
|
this.metrics = metricsFile
|
||||||
this.memoryLimit = 6
|
|
||||||
this.isIntermediate = true
|
|
||||||
this.analysisName = queueLogDir + outBam + ".dedup"
|
this.analysisName = queueLogDir + outBam + ".dedup"
|
||||||
this.jobName = queueLogDir + outBam + ".dedup"
|
this.jobName = queueLogDir + outBam + ".dedup"
|
||||||
}
|
}
|
||||||
|
|
||||||
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles {
|
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
|
||||||
this.input = inBams
|
this.input = inBams
|
||||||
this.output = outBam
|
this.output = outBam
|
||||||
this.memoryLimit = 4
|
|
||||||
this.isIntermediate = true
|
|
||||||
this.analysisName = queueLogDir + outBam + ".joinBams"
|
this.analysisName = queueLogDir + outBam + ".joinBams"
|
||||||
this.jobName = queueLogDir + outBam + ".joinBams"
|
this.jobName = queueLogDir + outBam + ".joinBams"
|
||||||
}
|
}
|
||||||
|
|
||||||
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam {
|
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs {
|
||||||
this.input = List(inSam)
|
this.input = List(inSam)
|
||||||
this.output = outBam
|
this.output = outBam
|
||||||
this.sortOrder = sortOrderP
|
this.sortOrder = sortOrderP
|
||||||
this.memoryLimit = 4
|
|
||||||
this.isIntermediate = true
|
|
||||||
this.analysisName = queueLogDir + outBam + ".sortSam"
|
this.analysisName = queueLogDir + outBam + ".sortSam"
|
||||||
this.jobName = queueLogDir + outBam + ".sortSam"
|
this.jobName = queueLogDir + outBam + ".sortSam"
|
||||||
}
|
}
|
||||||
|
|
||||||
case class validate (inBam: File, outLog: File) extends ValidateSamFile {
|
case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs {
|
||||||
this.input = List(inBam)
|
this.input = List(inBam)
|
||||||
this.output = outLog
|
this.output = outLog
|
||||||
this.maxRecordsInRam = 100000
|
this.maxRecordsInRam = 100000
|
||||||
this.REFERENCE_SEQUENCE = qscript.reference
|
this.REFERENCE_SEQUENCE = qscript.reference
|
||||||
this.memoryLimit = 4
|
|
||||||
this.isIntermediate = false
|
this.isIntermediate = false
|
||||||
this.analysisName = queueLogDir + outLog + ".validate"
|
this.analysisName = queueLogDir + outLog + ".validate"
|
||||||
this.jobName = queueLogDir + outLog + ".validate"
|
this.jobName = queueLogDir + outLog + ".validate"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups {
|
case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs {
|
||||||
this.input = List(inBam)
|
this.input = List(inBam)
|
||||||
this.output = outBam
|
this.output = outBam
|
||||||
this.RGID = readGroup.id
|
this.RGID = readGroup.id
|
||||||
|
|
@ -407,12 +414,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.jobName = queueLogDir + outBam + ".rg"
|
this.jobName = queueLogDir + outBam + ".rg"
|
||||||
}
|
}
|
||||||
|
|
||||||
trait BWACommonArgs extends CommandLineFunction {
|
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs {
|
||||||
this.memoryLimit = 4
|
|
||||||
this.isIntermediate = true
|
|
||||||
}
|
|
||||||
|
|
||||||
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with BWACommonArgs {
|
|
||||||
@Input(doc="bam file to be aligned") var bam = inBam
|
@Input(doc="bam file to be aligned") var bam = inBam
|
||||||
@Output(doc="output sai file") var sai = outSai
|
@Output(doc="output sai file") var sai = outSai
|
||||||
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai
|
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai
|
||||||
|
|
@ -420,7 +422,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.jobName = queueLogDir + outSai + ".bwa_aln_se"
|
this.jobName = queueLogDir + outSai + ".bwa_aln_se"
|
||||||
}
|
}
|
||||||
|
|
||||||
case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with BWACommonArgs {
|
case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs {
|
||||||
@Input(doc="bam file to be aligned") var bam = inBam
|
@Input(doc="bam file to be aligned") var bam = inBam
|
||||||
@Output(doc="output sai file for 1st mating pair") var sai = outSai1
|
@Output(doc="output sai file for 1st mating pair") var sai = outSai1
|
||||||
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai
|
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai
|
||||||
|
|
@ -428,7 +430,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1"
|
this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1"
|
||||||
}
|
}
|
||||||
|
|
||||||
case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with BWACommonArgs {
|
case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
|
||||||
@Input(doc="bam file to be aligned") var bam = inBam
|
@Input(doc="bam file to be aligned") var bam = inBam
|
||||||
@Input(doc="bwa alignment index file") var sai = inSai
|
@Input(doc="bwa alignment index file") var sai = inSai
|
||||||
@Output(doc="output aligned bam file") var alignedBam = outBam
|
@Output(doc="output aligned bam file") var alignedBam = outBam
|
||||||
|
|
@ -437,7 +439,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.jobName = queueLogDir + outBam + ".bwa_sam_se"
|
this.jobName = queueLogDir + outBam + ".bwa_sam_se"
|
||||||
}
|
}
|
||||||
|
|
||||||
case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with BWACommonArgs {
|
case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
|
||||||
@Input(doc="bam file to be aligned") var bam = inBam
|
@Input(doc="bam file to be aligned") var bam = inBam
|
||||||
@Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1
|
@Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1
|
||||||
@Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2
|
@Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2
|
||||||
|
|
|
||||||
|
|
@ -20,14 +20,14 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
|
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
|
||||||
var input: File = _
|
var input: File = _
|
||||||
|
|
||||||
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false)
|
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true)
|
||||||
var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R")
|
var R: String = _
|
||||||
|
|
||||||
@Input(doc="Reference fasta file", shortName="R", required=false)
|
@Input(doc="Reference fasta file", shortName="R", required=true)
|
||||||
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
var reference: File = _ // new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||||
|
|
||||||
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
|
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=true)
|
||||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
var dbSNP: File = _ // new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||||
|
|
||||||
val queueLogDir: String = ".qlog/"
|
val queueLogDir: String = ".qlog/"
|
||||||
var nContigs: Int = 0
|
var nContigs: Int = 0
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -1,3 +0,0 @@
|
||||||
<ivy-module version="1.0">
|
|
||||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1941" status="integration" publication="20110614114100" />
|
|
||||||
</ivy-module>
|
|
||||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
||||||
<ivy-module version="1.0">
|
<ivy-module version="1.0">
|
||||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1954" status="integration" publication="20110712113100" />
|
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1959" status="integration" publication="20110718185300" />
|
||||||
</ivy-module>
|
</ivy-module>
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
<ivy-module version="1.0">
|
|
||||||
<info organisation="net.sf" module="picard" revision="1.48.889" status="release" />
|
|
||||||
</ivy-module>
|
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
||||||
|
<ivy-module version="1.0">
|
||||||
|
<info organisation="net.sf" module="picard" revision="1.49.895" status="release" />
|
||||||
|
</ivy-module>
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
<ivy-module version="1.0">
|
|
||||||
<info organisation="net.sf" module="sam" revision="1.48.889" status="release" />
|
|
||||||
</ivy-module>
|
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
||||||
|
<ivy-module version="1.0">
|
||||||
|
<info organisation="net.sf" module="sam" revision="1.49.895" status="release" />
|
||||||
|
</ivy-module>
|
||||||
Loading…
Reference in New Issue