Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
1b0735f0a3
|
|
@ -146,12 +146,14 @@
|
|||
<mkdir dir="${lib.dir}"/>
|
||||
<mkdir dir="${ivy.jar.dir}"/>
|
||||
|
||||
<!-- Comment out the following two lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
|
||||
<get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
|
||||
dest="${ivy.jar.dir}/${ivy.jar.file}"
|
||||
usetimestamp="true"/>
|
||||
<taskdef resource="org/apache/ivy/ant/antlib.xml"
|
||||
uri="antlib:org.apache.ivy.ant"
|
||||
classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
|
||||
|
||||
<ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/>
|
||||
<property name="init.resolve.done" value="true"/>
|
||||
</target>
|
||||
|
|
@ -546,8 +548,9 @@
|
|||
<target name="vcf.jar" depends="gatk.compile,init.jar">
|
||||
<jar jarfile="${dist.dir}/vcf.jar">
|
||||
<fileset dir="${java.classes}">
|
||||
<include name="**/utils/codecs/**/*.class"/>
|
||||
<include name="**/utils/variantcontext/**/*.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/>
|
||||
<include name="org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
|
||||
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
|
||||
</fileset>
|
||||
|
|
@ -914,7 +917,7 @@
|
|||
</target>
|
||||
|
||||
<!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
|
||||
<target name="test" depends="init.buildall,test.compile" description="Run unit tests">
|
||||
<target name="test" depends="init.buildall,test.compile,vcf.jar" description="Run unit tests">
|
||||
<condition property="ttype" value="*UnitTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
|
|
|
|||
|
|
@ -99,5 +99,5 @@ gsa.read.gatkreport <- function(filename) {
|
|||
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
|
||||
}
|
||||
|
||||
gatkreport = as.list(tableEnv);
|
||||
gatkreport = as.list(tableEnv, all.names=TRUE);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,28 +34,23 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
|||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -92,7 +87,7 @@ public class GenomeAnalysisEngine {
|
|||
/**
|
||||
* Accessor for sample metadata
|
||||
*/
|
||||
private SampleDataSource sampleDataSource = null;
|
||||
private SampleDB sampleDB = null;
|
||||
|
||||
/**
|
||||
* Accessor for sharded reference-ordered data.
|
||||
|
|
@ -206,6 +201,9 @@ public class GenomeAnalysisEngine {
|
|||
// Prepare the data for traversal.
|
||||
initializeDataSources();
|
||||
|
||||
// initialize sampleDB
|
||||
initializeSampleDB();
|
||||
|
||||
// initialize and validate the interval list
|
||||
initializeIntervals();
|
||||
validateSuppliedIntervals();
|
||||
|
|
@ -692,12 +690,22 @@ public class GenomeAnalysisEngine {
|
|||
for (ReadFilter filter : filters)
|
||||
filter.initialize(this);
|
||||
|
||||
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
||||
|
||||
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
||||
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||
}
|
||||
|
||||
/**
|
||||
* Entry-point function to initialize the samples database from input data and pedigree arguments
|
||||
*/
|
||||
private void initializeSampleDB() {
|
||||
SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(this, argCollection.pedigreeValidationType);
|
||||
sampleDBBuilder.addSamplesFromSAMHeader(getSAMFileHeader());
|
||||
sampleDBBuilder.addSamplesFromSampleNames(SampleUtils.getUniqueSamplesFromRods(this));
|
||||
sampleDBBuilder.addSamplesFromPedigreeFiles(argCollection.pedigreeFiles);
|
||||
sampleDBBuilder.addSamplesFromPedigreeStrings(argCollection.pedigreeStrings);
|
||||
sampleDB = sampleDBBuilder.getFinalSampleDB();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a unique identifier for the reader sourcing this read.
|
||||
* @param read Read to examine.
|
||||
|
|
@ -716,100 +724,6 @@ public class GenomeAnalysisEngine {
|
|||
return getReadsDataSource().getSAMFile(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
|
||||
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
|
||||
* returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names
|
||||
* found in the corresponding bam file.
|
||||
*
|
||||
* @return Sets of samples in the merged input SAM stream, grouped by readers
|
||||
*/
|
||||
public List<Set<String>> getSamplesByReaders() {
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> samples = new HashSet<String>(1);
|
||||
sample_sets.add(samples);
|
||||
|
||||
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||
samples.add(g.getSample());
|
||||
}
|
||||
}
|
||||
|
||||
return sample_sets;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying
|
||||
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
|
||||
* returned by this method will contain 3 elements (one for each reader), with each element being a set of library names
|
||||
* found in the corresponding bam file.
|
||||
*
|
||||
* @return Sets of libraries present in the (merged) input SAM stream, grouped by readers
|
||||
*/
|
||||
public List<Set<String>> getLibrariesByReaders() {
|
||||
|
||||
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> libs = new HashSet<String>(2);
|
||||
lib_sets.add(libs);
|
||||
|
||||
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||
libs.add(g.getLibrary());
|
||||
}
|
||||
}
|
||||
|
||||
return lib_sets;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
|
||||
*
|
||||
* Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
|
||||
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
|
||||
* returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
|
||||
* (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
|
||||
*
|
||||
* @return sets of (merged) read group ids in order of input bams
|
||||
*/
|
||||
public List<Set<String>> getMergedReadGroupsByReaders() {
|
||||
|
||||
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> groups = new HashSet<String>(5);
|
||||
rg_sets.add(groups);
|
||||
|
||||
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||
if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
|
||||
// use HeaderMerger to translate original read group id from the reader into the read group id in the
|
||||
// merged stream, and save that remapped read group id to associate it with specific reader
|
||||
groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
|
||||
} else {
|
||||
// otherwise, pass through the unmapped read groups since this is what Picard does as well
|
||||
groups.add(g.getReadGroupId());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rg_sets;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
|
||||
*
|
||||
|
|
@ -929,6 +843,18 @@ public class GenomeAnalysisEngine {
|
|||
return readsDataSource.getHeader(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an ordered list of the unmerged SAM file headers known to this engine.
|
||||
* @return list of header for each input SAM file, in command line order
|
||||
*/
|
||||
public List<SAMFileHeader> getSAMFileHeaders() {
|
||||
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
|
||||
for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) {
|
||||
headers.add(getReadsDataSource().getHeader(id));
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the master sequence dictionary for this GATK engine instance
|
||||
* @return a never-null dictionary listing all of the contigs known to this engine instance
|
||||
|
|
@ -947,8 +873,6 @@ public class GenomeAnalysisEngine {
|
|||
return this.readsDataSource;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Sets the collection of GATK main application arguments.
|
||||
*
|
||||
|
|
@ -1035,140 +959,14 @@ public class GenomeAnalysisEngine {
|
|||
return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
|
||||
}
|
||||
|
||||
public SampleDataSource getSampleMetadata() {
|
||||
return this.sampleDataSource;
|
||||
}
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// code for working with Samples database
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get a sample by its ID
|
||||
* If an alias is passed in, return the main sample object
|
||||
* @param id sample id
|
||||
* @return sample Object with this ID
|
||||
*/
|
||||
public Sample getSampleById(String id) {
|
||||
return sampleDataSource.getSampleById(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample for a given read group
|
||||
* Must first look up ID for read group
|
||||
* @param readGroup of sample
|
||||
* @return sample object with ID from the read group
|
||||
*/
|
||||
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
|
||||
return sampleDataSource.getSampleByReadGroup(readGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a sample for a given read
|
||||
* Must first look up read group, and then sample ID for that read group
|
||||
* @param read of sample
|
||||
* @return sample object of this read
|
||||
*/
|
||||
public Sample getSampleByRead(SAMRecord read) {
|
||||
return getSampleByReadGroup(read.getReadGroup());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get number of sample objects
|
||||
* @return size of samples map
|
||||
*/
|
||||
public int sampleCount() {
|
||||
return sampleDataSource.sampleCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all samples with a given family ID
|
||||
* Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
|
||||
* @param familyId family ID
|
||||
* @return Samples with the given family ID
|
||||
*/
|
||||
public Set<Sample> getFamily(String familyId) {
|
||||
return sampleDataSource.getFamily(familyId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all children of a given sample
|
||||
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
|
||||
* @param sample parent sample
|
||||
* @return children of the given sample
|
||||
*/
|
||||
public Set<Sample> getChildren(Sample sample) {
|
||||
return sampleDataSource.getChildren(sample);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all the samples
|
||||
* @return
|
||||
*/
|
||||
public Collection<Sample> getSamples() {
|
||||
return sampleDataSource.getSamples();
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a list of sample names and returns their corresponding sample objects
|
||||
*
|
||||
* @param sampleNameList List of sample names
|
||||
* @return Corresponding set of samples
|
||||
*/
|
||||
public Set<Sample> getSamples(Collection<String> sampleNameList) {
|
||||
return sampleDataSource.getSamples(sampleNameList);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have any value (which could be null) for a given property
|
||||
* @param key Property key
|
||||
* @return Set of samples with the property
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key) {
|
||||
return sampleDataSource.getSamplesWithProperty(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have a property with a certain value
|
||||
* Value must be a string for now - could add a similar method for matching any objects in the future
|
||||
*
|
||||
* @param key Property key
|
||||
* @param value String property value
|
||||
* @return Set of samples that match key and value
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key, String value) {
|
||||
return sampleDataSource.getSamplesWithProperty(key, value);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of sample objects for the sample names in a variant context
|
||||
*
|
||||
* @param context Any variant context
|
||||
* @return a set of the sample objects
|
||||
*/
|
||||
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
|
||||
Set<Sample> samples = new HashSet<Sample>();
|
||||
for (String sampleName : context.getSampleNames()) {
|
||||
samples.add(sampleDataSource.getOrCreateSample(sampleName));
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all samples that were referenced in the SAM file
|
||||
*/
|
||||
public Set<Sample> getSAMFileSamples() {
|
||||
return sampleDataSource.getSAMFileSamples();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a subcontext restricted to samples with a given property key/value
|
||||
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
|
||||
* @param context VariantContext to filter
|
||||
* @param key property key
|
||||
* @param value property value (must be string)
|
||||
* @return subcontext
|
||||
*/
|
||||
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
|
||||
return sampleDataSource.subContextFromSampleProperty(context, key, value);
|
||||
public SampleDB getSampleDB() {
|
||||
return this.sampleDB;
|
||||
}
|
||||
|
||||
public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Input;
|
|||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
|
|
@ -44,10 +45,7 @@ import org.simpleframework.xml.stream.HyphenStyle;
|
|||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -72,11 +70,6 @@ public class GATKArgumentCollection {
|
|||
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
|
||||
public List<String> samFiles = new ArrayList<String>();
|
||||
|
||||
// parameters and their defaults
|
||||
@ElementList(required = false)
|
||||
@Argument(fullName = "sample_metadata", shortName = "SM", doc = "Sample file(s) in JSON format", required = false)
|
||||
public List<File> sampleFiles = new ArrayList<File>();
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
|
||||
public Integer readBufferSize = null;
|
||||
|
|
@ -215,28 +208,91 @@ public class GATKArgumentCollection {
|
|||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// distributed GATK arguments
|
||||
// PED (pedigree) support
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Element(required=false)
|
||||
@Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
|
||||
@Hidden
|
||||
public File processingTrackerFile = null;
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName="restartProcessingTracker",shortName="RPT",doc="Should we delete the processing tracker file at startup?",required=false)
|
||||
@Hidden
|
||||
public boolean restartProcessingTracker = false;
|
||||
/**
|
||||
* <p>Reads PED file-formatted tabular text files describing meta-data about the samples being
|
||||
* processed in the GATK.</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>see <a href="http://www.broadinstitute.org/mpg/tagger/faq.html">http://www.broadinstitute.org/mpg/tagger/faq.html</a></li>
|
||||
* <li>see <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped">http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped</a></li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>Family ID</li>
|
||||
* <li>Individual ID</li>
|
||||
* <li>Paternal ID</li>
|
||||
* <li>Maternal ID</li>
|
||||
* <li>Sex (1=male; 2=female; other=unknown)</li>
|
||||
* <li>Phenotype</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
|
||||
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
|
||||
* quantitative trait or an affection status column: GATK will automatically detect which type
|
||||
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).</p>
|
||||
*
|
||||
* <p>If an individual's sex is unknown, then any character other than 1 or 2 can be used.</p>
|
||||
*
|
||||
* <p>You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
|
||||
* line will be ignored. Do not start any family IDs with this character therefore.</p>
|
||||
*
|
||||
* <p>Affection status should be coded:</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>-9 missing</li>
|
||||
* <li>0 missing</li>
|
||||
* <li>1 unaffected</li>
|
||||
* <li>2 affected</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>If any value outside of -9,0,1,2 is detected than the samples are assumed
|
||||
* to phenotype values are interpreted as string phenotype values. In this case -9 uniquely
|
||||
* represents the missing value.</p>
|
||||
*
|
||||
* <p>Genotypes (column 7 onwards) cannot be specified to the GATK.</p>
|
||||
*
|
||||
* <p>For example, here are two individuals (one row = one person):</p>
|
||||
*
|
||||
* <pre>
|
||||
* FAM001 1 0 0 1 2
|
||||
* FAM001 2 0 0 1 2
|
||||
* </pre>
|
||||
*
|
||||
* <p>Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to
|
||||
* tell the GATK PED parser that the corresponding fields are missing from the ped file.</p>
|
||||
*
|
||||
* <p>Note that most GATK walkers do not use pedigree information. Walkers that require pedigree
|
||||
* data should clearly indicate so in their arguments and will throw errors if required pedigree
|
||||
* information is missing.</p>
|
||||
*/
|
||||
@Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
|
||||
public List<File> pedigreeFiles = Collections.emptyList();
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName="processingTrackerStatusFile",shortName="CSF",doc="If provided, a detailed accounting of the state of the process tracker is written to this file. For debugging, only",required=false)
|
||||
@Hidden
|
||||
public File processingTrackerStatusFile = null;
|
||||
/**
|
||||
* Inline PED records (see -ped argument). Each -pedString STRING can contain one or more
|
||||
* valid PED records (see -ped) separated by semi-colons. Supports all tags for each pedString
|
||||
* as -ped supports
|
||||
*/
|
||||
@Argument(fullName="pedigreeString", shortName = "pedString", doc="Pedigree string for samples",required=false)
|
||||
public List<String> pedigreeStrings = Collections.emptyList();
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName="processingTrackerID",shortName="CID",doc="If provided, an integer ID (starting at 1) indicating a unique id for this process within the distributed GATK group",required=false)
|
||||
@Hidden
|
||||
public int processTrackerID = -1;
|
||||
/**
|
||||
* How strict should we be in parsing the PED files?
|
||||
*/
|
||||
@Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="How strict should we be in validating the pedigree information?",required=false)
|
||||
public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// BAM indexing and sharding arguments
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName="allow_intervals_with_unindexed_bam",doc="Allow interval processing with an unsupported BAM. NO INTEGRATION TESTS are available. Use at your own risk.",required=false)
|
||||
|
|
@ -387,7 +443,7 @@ public class GATKArgumentCollection {
|
|||
return false;
|
||||
}
|
||||
if ((other.RODToInterval == null && RODToInterval != null) ||
|
||||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
|
||||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -405,20 +461,6 @@ public class GATKArgumentCollection {
|
|||
(other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
|
||||
return false;
|
||||
|
||||
if ((other.processingTrackerFile == null && this.processingTrackerFile != null) ||
|
||||
(other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
|
||||
return false;
|
||||
|
||||
if ((other.processingTrackerStatusFile == null && this.processingTrackerStatusFile != null) ||
|
||||
(other.processingTrackerStatusFile != null && !other.processingTrackerStatusFile.equals(this.processingTrackerStatusFile)))
|
||||
return false;
|
||||
|
||||
if ( restartProcessingTracker != other.restartProcessingTracker )
|
||||
return false;
|
||||
|
||||
if ( processTrackerID != other.processTrackerID )
|
||||
return false;
|
||||
|
||||
if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM)
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk.contexts;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -76,14 +75,6 @@ public class AlignmentContextUtils {
|
|||
return splitContextBySampleName(context, null);
|
||||
}
|
||||
|
||||
public static Map<Sample, AlignmentContext> splitContextBySample(AlignmentContext context) {
|
||||
Map<Sample, AlignmentContext> m = new HashMap<Sample, AlignmentContext>();
|
||||
for ( Map.Entry<String, AlignmentContext> entry : splitContextBySampleName(context, null).entrySet() ) {
|
||||
m.put(new Sample(entry.getKey()), entry.getValue());
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead
|
||||
* of sample object.
|
||||
|
|
@ -97,8 +88,8 @@ public class AlignmentContextUtils {
|
|||
GenomeLoc loc = context.getLocation();
|
||||
HashMap<String, AlignmentContext> contexts = new HashMap<String, AlignmentContext>();
|
||||
|
||||
for(String sample: context.getPileup().getSampleNames()) {
|
||||
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSampleName(sample);
|
||||
for(String sample: context.getPileup().getSamples()) {
|
||||
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);
|
||||
|
||||
// Don't add empty pileups to the split context.
|
||||
if(pileupBySample.size() == 0)
|
||||
|
|
|
|||
|
|
@ -1,30 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Aug 12, 2010
|
||||
* Time: 2:09:16 PM
|
||||
*/
|
||||
public class PropertyDefinition {
|
||||
|
||||
String property;
|
||||
|
||||
String[] values;
|
||||
|
||||
public String getProperty() {
|
||||
return property;
|
||||
}
|
||||
|
||||
public void setProperty(String property) {
|
||||
this.property = property;
|
||||
}
|
||||
|
||||
public String[] getValues() {
|
||||
return values;
|
||||
}
|
||||
|
||||
public void setValues(String[] values) {
|
||||
this.values = values;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,203 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Jul 26, 2010
|
||||
* Time: 3:31:38 PM
|
||||
*/
|
||||
public class Sample implements java.io.Serializable {
|
||||
|
||||
private final String id;
|
||||
|
||||
private boolean hasSampleFileEntry = false; // true if this sample has an entry in a sample file
|
||||
|
||||
private boolean hasSAMFileEntry = false; // true if this sample has an entry in the SAM file
|
||||
|
||||
private HashMap<String, Object> properties = new HashMap<String, Object>();
|
||||
|
||||
private HashMap<String, Sample> relationships = new HashMap<String, Sample>();
|
||||
|
||||
public enum Gender {
|
||||
MALE,
|
||||
FEMALE,
|
||||
UNKNOWN
|
||||
}
|
||||
|
||||
public Sample(String id) {
|
||||
/* if (id == null) {
|
||||
throw new StingException("Error creating sample: sample ID cannot be null");
|
||||
}*/
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
public Map<String, Object> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
public void setProperties(Map<String, Object> properties) {
|
||||
this.properties = (HashMap) properties;
|
||||
}
|
||||
|
||||
public Map<String,Sample> getRelationships() {
|
||||
return Collections.unmodifiableMap(this.relationships);
|
||||
}
|
||||
|
||||
public void setSampleFileEntry(boolean value) {
|
||||
this.hasSampleFileEntry = value;
|
||||
}
|
||||
|
||||
public boolean hasSAMFileEntry() {
|
||||
return this.hasSAMFileEntry;
|
||||
}
|
||||
|
||||
public void setSAMFileEntry(boolean value) {
|
||||
this.hasSAMFileEntry = value;
|
||||
}
|
||||
|
||||
public boolean hasSampleFileEntry() {
|
||||
return this.hasSampleFileEntry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get one property
|
||||
* @param key key of property
|
||||
* @return value of property as generic object
|
||||
*/
|
||||
public Object getProperty(String key) {
|
||||
return properties.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a property
|
||||
* If property already exists, it is overwritten
|
||||
* @param key key of property
|
||||
* @param value object to be stored in properties array
|
||||
*/
|
||||
public void setProperty(String key, Object value) {
|
||||
|
||||
if (relationships.containsKey(key)) {
|
||||
throw new StingException("The same key cannot exist as a property and a relationship");
|
||||
}
|
||||
|
||||
if (key.equals("gender") && value.getClass() != Gender.class) {
|
||||
throw new StingException("'gender' property must be of type Sample.Gender");
|
||||
}
|
||||
|
||||
if (key.equals("population") && value.getClass() != String.class) {
|
||||
throw new StingException("'population' property must be of type String");
|
||||
}
|
||||
|
||||
properties.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get one relationship
|
||||
* @param key of relationship
|
||||
* @return Sample object that this relationship points to
|
||||
*/
|
||||
public Sample getRelationship(String key) {
|
||||
return relationships.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set one relationship
|
||||
* If already set, it is overwritten
|
||||
* @param key key of the relationship
|
||||
* @param value Sample object this relationship points to
|
||||
*/
|
||||
public void setRelationship(String key, Sample value) {
|
||||
if (properties.containsKey(key)) {
|
||||
throw new StingException("The same key cannot exist as a property and a relationship");
|
||||
}
|
||||
relationships.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample's mother
|
||||
* @return sample object with relationship mother, if exists, or null
|
||||
*/
|
||||
public Sample getMother() {
|
||||
return getRelationship("mother");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample's father
|
||||
* @return sample object with relationship father, if exists, or null
|
||||
*/
|
||||
public Sample getFather() {
|
||||
return getRelationship("father");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get gender of the sample
|
||||
* @return property of key "gender" - must be of type Gender
|
||||
*/
|
||||
public Gender getGender() {
|
||||
return (Gender) properties.get("gender");
|
||||
}
|
||||
|
||||
public String getPopulation() {
|
||||
return (String) properties.get("population");
|
||||
}
|
||||
|
||||
public String getFamilyId() {
|
||||
return (String) properties.get("familyId");
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if sample is male, false if female, unknown, or null
|
||||
*/
|
||||
public boolean isMale() {
|
||||
return properties.get("gender") == Gender.MALE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if sample is female, false if male, unknown or null
|
||||
*/
|
||||
public boolean isFemale() {
|
||||
return properties.get("gender") == Gender.MALE;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param key property key
|
||||
* @return true if sample has this property (even if its value is null)
|
||||
*/
|
||||
public boolean hasProperty(String key) {
|
||||
return properties.containsKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
Sample sample = (Sample) o;
|
||||
|
||||
if (hasSAMFileEntry != sample.hasSAMFileEntry) return false;
|
||||
if (hasSampleFileEntry != sample.hasSampleFileEntry) return false;
|
||||
if (id != null ? !id.equals(sample.id) : sample.id != null) return false;
|
||||
if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
|
||||
if (relationships != null ? !relationships.equals(sample.relationships) : sample.relationships != null)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return id != null ? id.hashCode() : "".hashCode();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Aug 13, 2010
|
||||
* Time: 5:13:46 PM
|
||||
*/
|
||||
public class SampleAlias {
|
||||
|
||||
String mainId;
|
||||
|
||||
String[] otherIds;
|
||||
|
||||
public String getMainId() {
|
||||
return mainId;
|
||||
}
|
||||
|
||||
public void setMainId(String mainId) {
|
||||
this.mainId = mainId;
|
||||
}
|
||||
|
||||
public String[] getOtherIds() {
|
||||
return otherIds;
|
||||
}
|
||||
|
||||
public void setOtherIds(String[] otherIds) {
|
||||
this.otherIds = otherIds;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,590 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.yaml.snakeyaml.TypeDescription;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
import org.yaml.snakeyaml.constructor.Constructor;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Jul 26, 2010
|
||||
* Time: 3:30:09 PM
|
||||
*
|
||||
* This class stores and manages sample metadata. This data is encoded in a sample file, which can be included
|
||||
* in the GATK by the "--samples" argument. This class reads and parses those files.
|
||||
*
|
||||
* Although there are a set of public methods for accessing sample data, they aren't used by walkers - they are really
|
||||
* only used by GenomeAnalysisEngine. An instance of GenomeAnalysisEngine has one SampleDataSource. When a walker
|
||||
* wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
|
||||
*
|
||||
*/
|
||||
public class SampleDataSource {
|
||||
|
||||
/**
|
||||
* SAMFileHeader that has been created for this analysis.
|
||||
*/
|
||||
private SAMFileHeader header;
|
||||
|
||||
/**
|
||||
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
|
||||
* this is stored as a HashMap.
|
||||
*/
|
||||
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
|
||||
|
||||
/**
|
||||
* Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
|
||||
* datasets. If this is the case, one ID is the "primary ID" and others are "aliases".
|
||||
*
|
||||
* This maps ID => primary ID for all samples ID strings - both primary IDs and aliases.
|
||||
*/
|
||||
private HashMap<String, String> sampleAliases = new HashMap<String, String>();
|
||||
|
||||
/**
|
||||
* While loading sample files, we must be aware of "special" properties and relationships that are always allowed
|
||||
*/
|
||||
public static final String[] specialProperties = new String[] {"familyId", "population", "gender"};
|
||||
public static final String[] specialRelationships = new String[] {"mother", "father"};
|
||||
|
||||
/**
|
||||
* Constructor takes both a SAM header and sample files because the two must be integrated.
|
||||
* @param header SAMFileHeader that has been created for this analysis
|
||||
* @param sampleFiles Sample files that were included on the command line
|
||||
*/
|
||||
public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
|
||||
this();
|
||||
this.header = header;
|
||||
// create empty sample object for each sample referenced in the SAM header
|
||||
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
|
||||
if (!hasSample(sampleName)) {
|
||||
Sample newSample = new Sample(sampleName);
|
||||
newSample.setSAMFileEntry(true);
|
||||
samples.put(sampleName, newSample);
|
||||
}
|
||||
}
|
||||
|
||||
// add files consecutively
|
||||
if (sampleFiles != null) {
|
||||
for (File file : sampleFiles) {
|
||||
addFile(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public SampleDataSource() {
|
||||
samples.put(null, new Sample(null));
|
||||
}
|
||||
|
||||
/**
|
||||
* Hallucinates sample objects for all the samples in the SAM file and stores them
|
||||
*/
|
||||
public void addSamplesFromSAMHeader(SAMFileHeader header) {
|
||||
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
|
||||
if (!hasSample(sampleName)) {
|
||||
Sample newSample = new Sample(sampleName);
|
||||
newSample.setSAMFileEntry(true);
|
||||
samples.put(sampleName, newSample);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse one sample file and integrate it with samples that are already there
|
||||
* Fail quickly if we find any errors in the file
|
||||
*/
|
||||
public void addFile(File sampleFile) {
|
||||
|
||||
BufferedReader reader;
|
||||
try {
|
||||
reader = new BufferedReader(new FileReader(sampleFile));
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new StingException("Could not open sample file " + sampleFile.getAbsolutePath(), e);
|
||||
}
|
||||
|
||||
// set up YAML reader - a "Constructor" creates java object from YAML and "Loader" loads the file
|
||||
Constructor con = new Constructor(SampleFileParser.class);
|
||||
TypeDescription desc = new TypeDescription(SampleFileParser.class);
|
||||
desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
|
||||
desc.putListPropertyType("sampleAliases", SampleAlias.class);
|
||||
con.addTypeDescription(desc);
|
||||
Yaml yaml = new Yaml(con);
|
||||
|
||||
// SampleFileParser stores an object representation of a sample file - this is what we'll parse
|
||||
SampleFileParser parser;
|
||||
try {
|
||||
parser = (SampleFileParser) yaml.load(reader);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("There was a syntactic error with the YAML in sample file " + sampleFile.getAbsolutePath(), e);
|
||||
}
|
||||
|
||||
// check to see which validation options were built into the file
|
||||
boolean restrictProperties = parser.getAllowedProperties() != null;
|
||||
boolean restrictRelationships = parser.getAllowedRelationships() != null;
|
||||
boolean restrictPropertyValues = parser.getPropertyDefinitions() != null;
|
||||
|
||||
// propertyValues stores the values that are allowed for a given property
|
||||
HashMap<String, HashSet> propertyValues = null;
|
||||
if (restrictPropertyValues) {
|
||||
propertyValues = new HashMap<String, HashSet>();
|
||||
for (PropertyDefinition def : parser.getPropertyDefinitions()) {
|
||||
HashSet<String> set = new HashSet<String>();
|
||||
for (String value : def.getValues()) {
|
||||
set.add(value);
|
||||
}
|
||||
propertyValues.put(def.getProperty(), set);
|
||||
}
|
||||
}
|
||||
|
||||
// make sure the aliases are valid
|
||||
validateAliases(parser);
|
||||
|
||||
// loop through each sample in the file - a SampleParser stores an object that will become a Sample
|
||||
for (SampleParser sampleParser : parser.getSamples()) {
|
||||
|
||||
try {
|
||||
// step 1: add the sample if it doesn't already exist
|
||||
Sample sample = getSampleById(sampleParser.getId());
|
||||
if (sample == null) {
|
||||
sample = new Sample(sampleParser.getId());
|
||||
}
|
||||
addSample(sample);
|
||||
sample.setSampleFileEntry(true);
|
||||
|
||||
// step 2: add the properties
|
||||
if (sampleParser.getProperties() != null) {
|
||||
for (String property : sampleParser.getProperties().keySet()) {
|
||||
|
||||
// check that property is allowed
|
||||
if (restrictProperties) {
|
||||
if (!isPropertyValid(property, parser.getAllowedProperties())) {
|
||||
throw new StingException(property + " is an invalid property. It is not included in the list " +
|
||||
"of allowed properties.");
|
||||
}
|
||||
}
|
||||
|
||||
// next check that the value is allowed
|
||||
if (restrictPropertyValues) {
|
||||
if (!isValueAllowed(property, sampleParser.getProperties().get(property), propertyValues)) {
|
||||
throw new StingException("The value of property '" + property + "' is invalid. " +
|
||||
"It is not included in the list of allowed values for this property.");
|
||||
}
|
||||
}
|
||||
|
||||
// next check that there isn't already a conflicting property there
|
||||
if (sample.getProperty(property) != null &&
|
||||
sample.getProperty(property) != sampleParser.getProperties().get(property))
|
||||
{
|
||||
throw new StingException(property + " is a conflicting property!");
|
||||
}
|
||||
|
||||
// checks are passed - now add the property!
|
||||
saveProperty(sample, property, sampleParser.getProperties().get(property));
|
||||
}
|
||||
}
|
||||
|
||||
// step 3: add the relationships
|
||||
if (sampleParser.getRelationships() != null) {
|
||||
for (String relationship : sampleParser.getRelationships().keySet()) {
|
||||
String relativeId = sampleParser.getRelationships().get(relationship);
|
||||
if (relativeId == null) {
|
||||
throw new StingException("The relationship cannot be null");
|
||||
}
|
||||
|
||||
// first check that it's not invalid
|
||||
if (restrictRelationships) {
|
||||
if (!isRelationshipValid(relationship, parser.getAllowedRelationships())) {
|
||||
throw new StingException(relationship + " is an invalid relationship");
|
||||
}
|
||||
}
|
||||
|
||||
// next check that there isn't already a conflicting property there
|
||||
if (sample.getRelationship(relationship) != null) {
|
||||
if (sample.getRelationship(relationship).getId() != sampleParser.getProperties().get(relationship)) {
|
||||
throw new StingException(relationship + " is a conflicting relationship!");
|
||||
}
|
||||
// if the relationship is already set - and consistent with what we're reading now - no need to continue
|
||||
else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// checks are passed - now save the relationship
|
||||
saveRelationship(sample, relationship, relativeId);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new StingException("An error occurred while loading this sample from the sample file: " +
|
||||
sampleParser.getId(), e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private boolean isValueAllowed(String key, Object value, HashMap<String, HashSet> valuesList) {
|
||||
|
||||
// if the property values weren't specified for this property, then any value is okay
|
||||
if (!valuesList.containsKey(key)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// if this property has enumerated values, it must be a string
|
||||
else if (value.getClass() != String.class)
|
||||
return false;
|
||||
|
||||
// is the value specified or not?
|
||||
else if (!valuesList.get(key).contains(value))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes sure that the aliases are valid
|
||||
* Checks that 1) no string is used as both a main ID and an alias;
|
||||
* 2) no alias is used more than once
|
||||
* @param parser
|
||||
*/
|
||||
private void validateAliases(SampleFileParser parser) {
|
||||
|
||||
// no aliases sure validate
|
||||
if (parser.getSampleAliases() == null)
|
||||
return;
|
||||
|
||||
HashSet<String> mainIds = new HashSet<String>();
|
||||
HashSet<String> otherIds = new HashSet<String>();
|
||||
|
||||
for (SampleAlias sampleAlias : parser.getSampleAliases()) {
|
||||
mainIds.add(sampleAlias.getMainId());
|
||||
for (String otherId : sampleAlias.getOtherIds()) {
|
||||
if (mainIds.contains(otherId))
|
||||
throw new StingException(String.format("The aliases in your sample file are invalid - the alias %s cannot " +
|
||||
"be both a main ID and an other ID", otherId));
|
||||
|
||||
if (!otherIds.add(otherId))
|
||||
throw new StingException(String.format("The aliases in your sample file are invalid - %s is listed as an " +
|
||||
"alias more than once.", otherId));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isPropertyValid(String property, String[] allowedProperties) {
|
||||
|
||||
// is it a special property that is always allowed?
|
||||
for (String allowedProperty : specialProperties) {
|
||||
if (property.equals(allowedProperty))
|
||||
return true;
|
||||
}
|
||||
|
||||
// is it in the allowed properties list?
|
||||
for (String allowedProperty : allowedProperties) {
|
||||
if (property.equals(allowedProperty))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isRelationshipValid(String relationship, String[] allowedRelationships) {
|
||||
|
||||
// is it a special relationship that is always allowed?
|
||||
for (String allowedRelationship : specialRelationships) {
|
||||
if (relationship.equals(allowedRelationship))
|
||||
return true;
|
||||
}
|
||||
|
||||
// is it in the allowed properties list?
|
||||
for (String allowedRelationship : allowedRelationships) {
|
||||
if (relationship.equals(allowedRelationship))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves a property as the correct type
|
||||
* @param key property key
|
||||
* @param value property value, as read from YAML parser
|
||||
* @return property value to be stored
|
||||
*/
|
||||
private void saveProperty(Sample sample, String key, Object value) {
|
||||
|
||||
// convert gender to the right type, if it was stored as a String
|
||||
if (key.equals("gender")) {
|
||||
if (((String) value).toLowerCase().equals("male")) {
|
||||
value = Sample.Gender.MALE;
|
||||
}
|
||||
else if (((String) value).toLowerCase().equals("female")) {
|
||||
value = Sample.Gender.FEMALE;
|
||||
}
|
||||
else if (((String) value).toLowerCase().equals("unknown")) {
|
||||
value = Sample.Gender.UNKNOWN;
|
||||
}
|
||||
else if (value != null) {
|
||||
throw new StingException("'gender' property must be male, female, or unknown.");
|
||||
}
|
||||
}
|
||||
try {
|
||||
sample.setProperty(key, value);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("Could not save property " + key, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves a relationship as the correct type
|
||||
* @param key relationship key
|
||||
* @param relativeId sample ID string of the relative
|
||||
* @return relationship value to be stored
|
||||
*/
|
||||
private void saveRelationship(Sample sample, String key, String relativeId) {
|
||||
|
||||
// get the reference that we'll store as the value
|
||||
Sample relative = getSampleById(relativeId);
|
||||
|
||||
// create sample object for the relative, if necessary
|
||||
if (relative == null) {
|
||||
relative = new Sample(relativeId);
|
||||
addSample(relative);
|
||||
}
|
||||
sample.setRelationship(key, relative);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Filter a sample name in case it is an alias
|
||||
* @param sampleId to be filtered
|
||||
* @return ID of sample that stores data for this alias
|
||||
*/
|
||||
private String aliasFilter(String sampleId) {
|
||||
if (!sampleAliases.containsKey(sampleId))
|
||||
return sampleId;
|
||||
else
|
||||
return sampleAliases.get(sampleId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a sample to the collection
|
||||
* @param sample to be added
|
||||
*/
|
||||
private void addSample(Sample sample) {
|
||||
samples.put(sample.getId(), sample);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if sample with this ID exists
|
||||
* Note that this will return true if name passed in is an alias
|
||||
* @param id ID of sample to be checked
|
||||
* @return true if sample exists; false if not
|
||||
*/
|
||||
public boolean hasSample(String id) {
|
||||
return samples.get(aliasFilter(id)) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a sample by its ID
|
||||
* If an alias is passed in, return the main sample object
|
||||
* @param id
|
||||
* @return sample Object with this ID
|
||||
*/
|
||||
public Sample getSampleById(String id) {
|
||||
return samples.get(aliasFilter(id));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample for a given read group
|
||||
* Must first look up ID for read group
|
||||
* @param readGroup of sample
|
||||
* @return sample object with ID from the read group
|
||||
*/
|
||||
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
|
||||
String nameFromReadGroup = readGroup.getSample();
|
||||
return getSampleById(nameFromReadGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a sample for a given read
|
||||
* Must first look up read group, and then sample ID for that read group
|
||||
* @param read of sample
|
||||
* @return sample object of this read
|
||||
*/
|
||||
public Sample getSampleByRead(SAMRecord read) {
|
||||
return getSampleByReadGroup(read.getReadGroup());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get number of sample objects
|
||||
* @return size of samples map
|
||||
*/
|
||||
public int sampleCount() {
|
||||
return samples.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all samples with a given family ID
|
||||
* Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
|
||||
* @param familyId
|
||||
* @return
|
||||
*/
|
||||
public Set<Sample> getFamily(String familyId) {
|
||||
HashSet<Sample> familyMembers = new HashSet<Sample>();
|
||||
|
||||
for (Sample sample : samples.values()) {
|
||||
if (sample.getFamilyId() != null) {
|
||||
if (sample.getFamilyId().equals(familyId))
|
||||
familyMembers.add(sample);
|
||||
}
|
||||
}
|
||||
return familyMembers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all children of a given sample
|
||||
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
|
||||
* @param sample
|
||||
* @return
|
||||
*/
|
||||
public Set<Sample> getChildren(Sample sample) {
|
||||
HashSet<Sample> children = new HashSet<Sample>();
|
||||
for (Sample familyMember : getFamily(sample.getFamilyId())) {
|
||||
if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
|
||||
children.add(familyMember);
|
||||
}
|
||||
}
|
||||
return children;
|
||||
}
|
||||
|
||||
public Set<Sample> getSamples() {
|
||||
HashSet<Sample> set = new HashSet<Sample>();
|
||||
set.addAll(samples.values());
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a collection of sample names and returns their corresponding sample objects
|
||||
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
|
||||
* @param sampleNameList Set of sample names
|
||||
* @return Corresponding set of samples
|
||||
*/
|
||||
public Set<Sample> getSamples(Collection<String> sampleNameList) {
|
||||
HashSet<Sample> samples = new HashSet<Sample>();
|
||||
for (String name : sampleNameList) {
|
||||
try {
|
||||
samples.add(getSampleById(name));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("Could not get sample with the following ID: " + name, e);
|
||||
}
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have any value (which could be null) for a given property
|
||||
* @param key Property key
|
||||
* @return Set of samples with the property
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key) {
|
||||
HashSet<Sample> toReturn = new HashSet<Sample>();
|
||||
for (Sample s : samples.values()) {
|
||||
if (s.hasProperty(key))
|
||||
toReturn.add(s);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have a property with a certain value
|
||||
* Value must be a string for now - could add a similar method for matching any objects in the future
|
||||
*
|
||||
* @param key Property key
|
||||
* @param value String property value
|
||||
* @return Set of samples that match key and value
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key, String value) {
|
||||
Set<Sample> toReturn = getSamplesWithProperty(key);
|
||||
for (Sample s : toReturn) {
|
||||
if (!s.getProperty(key).equals(value))
|
||||
toReturn.remove(s);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public Sample getOrCreateSample(String id) {
|
||||
Sample sample = getSampleById(id);
|
||||
if (sample == null) {
|
||||
sample = new Sample(id);
|
||||
addSample(sample);
|
||||
}
|
||||
return sample;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all samples that were referenced in the SAM file
|
||||
*/
|
||||
public Set<Sample> getSAMFileSamples() {
|
||||
Set<Sample> toReturn = new HashSet<Sample>();
|
||||
for (Sample sample : samples.values()) {
|
||||
if (sample.hasSAMFileEntry())
|
||||
toReturn.add(sample);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of sample objects for the sample names in a variant context
|
||||
*
|
||||
* @param context Any variant context
|
||||
* @return a set of the sample objects
|
||||
*/
|
||||
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
|
||||
Set<Sample> samples = new HashSet<Sample>();
|
||||
for (String sampleName : context.getSampleNames()) {
|
||||
samples.add(getOrCreateSample(sampleName));
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a subcontext restricted to samples with a given property key/value
|
||||
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
|
||||
* @param context VariantContext to filter
|
||||
* @param key property key
|
||||
* @param value property value (must be string)
|
||||
* @return subcontext
|
||||
*/
|
||||
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
|
||||
|
||||
Set<String> samplesWithProperty = new HashSet<String>();
|
||||
for (String sampleName : context.getSampleNames()) {
|
||||
Sample s = samples.get(sampleName);
|
||||
if (s != null && s.hasProperty(key) && s.getProperty(key).equals(value))
|
||||
samplesWithProperty.add(sampleName);
|
||||
}
|
||||
Map<String, Genotype> genotypes = context.getGenotypes(samplesWithProperty);
|
||||
return context.subContextFromGenotypes(genotypes.values());
|
||||
}
|
||||
|
||||
public static SampleDataSource createEmptyDataSource() {
|
||||
SAMFileHeader header = new SAMFileHeader();
|
||||
return new SampleDataSource(header, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Aug 12, 2010
|
||||
* Time: 1:30:44 PM
|
||||
*/
|
||||
public class SampleFileParser {
|
||||
|
||||
private SampleAlias[] sampleAliases;
|
||||
|
||||
private String[] allowedProperties;
|
||||
|
||||
private String[] allowedRelationships;
|
||||
|
||||
private PropertyDefinition[] propertyDefinitions;
|
||||
|
||||
private SampleParser[] samples;
|
||||
|
||||
public PropertyDefinition[] getPropertyDefinitions() {
|
||||
return propertyDefinitions;
|
||||
}
|
||||
|
||||
public void setPropertyDefinitions(PropertyDefinition[] propertyDefinitions) {
|
||||
this.propertyDefinitions = propertyDefinitions;
|
||||
}
|
||||
|
||||
public SampleFileParser() {
|
||||
|
||||
}
|
||||
|
||||
public String[] getAllowedProperties() {
|
||||
return allowedProperties;
|
||||
}
|
||||
|
||||
public void setAllowedProperties(String[] allowedProperties) {
|
||||
this.allowedProperties = allowedProperties;
|
||||
}
|
||||
|
||||
public SampleParser[] getSamples() {
|
||||
return samples;
|
||||
}
|
||||
|
||||
public void setSamples(SampleParser[] samples) {
|
||||
this.samples = samples;
|
||||
}
|
||||
|
||||
public String[] getAllowedRelationships() {
|
||||
return allowedRelationships;
|
||||
}
|
||||
|
||||
public void setAllowedRelationships(String[] allowedRelationships) {
|
||||
this.allowedRelationships = allowedRelationships;
|
||||
}
|
||||
|
||||
public SampleAlias[] getSampleAliases() {
|
||||
return sampleAliases;
|
||||
}
|
||||
|
||||
public void setSampleAliases(SampleAlias[] sampleAliases) {
|
||||
this.sampleAliases = sampleAliases;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Aug 13, 2010
|
||||
* Time: 2:09:43 PM
|
||||
*/
|
||||
public class SampleParser {
|
||||
|
||||
private String id;
|
||||
|
||||
private HashMap<String, Object> properties;
|
||||
|
||||
private HashMap<String, String> relationships;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public HashMap<String, Object> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
public void setProperties(HashMap<String, Object> properties) {
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
public HashMap<String, String> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
public void setRelationships(HashMap<String, String> relationships) {
|
||||
this.relationships = relationships;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -84,12 +84,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
*/
|
||||
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
|
||||
super(engine, walker, reads, reference, rods);
|
||||
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||
|
||||
if (engine.getArguments().processingTrackerFile != null) {
|
||||
throw new UserException.BadArgumentValue("-C", "Distributed GATK calculations currently not supported in multi-threaded mode. Complain to Mark depristo@broadinstitute.org to implement and test this code path");
|
||||
}
|
||||
}
|
||||
|
||||
public Object execute( Walker walker, ShardStrategy shardStrategy ) {
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
|||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
|
|
@ -56,7 +57,8 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
traversalEngine.startTimersIfNecessary();
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
LocusWalker lWalker = (LocusWalker)walker;
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
|
||||
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
|
|
|
|||
|
|
@ -62,7 +62,10 @@ public class ShardTraverser implements Callable {
|
|||
|
||||
Object accumulator = walker.reduceInit();
|
||||
LocusWalker lWalker = (LocusWalker)walker;
|
||||
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(), microScheduler.engine.getSampleMetadata()); // todo: microScheduler.engine is protected - is it okay to user it here?
|
||||
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
|
||||
microScheduler.getReadIterator(shard),
|
||||
shard.getGenomeLocs(),
|
||||
microScheduler.engine.getSampleDB().getSampleNames()); // todo: microScheduler.engine is protected - is it okay to user it here?
|
||||
ShardDataProvider dataProvider = null;
|
||||
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
|
|||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
|
|
@ -12,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
|
@ -63,17 +63,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
* the given intervals.
|
||||
* @param iterator The data source for this window.
|
||||
* @param intervals The set of intervals over which to traverse.
|
||||
* @param sampleData SampleDataSource that we can reference reads with
|
||||
* @param sampleNames The complete set of sample names in the reads in shard
|
||||
*/
|
||||
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, SampleDataSource sampleData ) {
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
|
||||
this.sourceInfo = shard.getReadProperties();
|
||||
this.readIterator = iterator;
|
||||
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
|
||||
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
||||
}
|
||||
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
|
||||
this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
public Iterator<WindowMakerIterator> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,12 +35,11 @@ import org.broadinstitute.sting.gatk.DownsampleType;
|
|||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ReservoirDownsampler;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -52,9 +51,6 @@ import java.util.*;
|
|||
|
||||
/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
|
||||
public class LocusIteratorByState extends LocusIterator {
|
||||
// private static long discarded_bases = 0L;
|
||||
// private static long observed_bases = 0L;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||
|
||||
|
|
@ -69,7 +65,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* Used to create new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
private final ArrayList<Sample> samples;
|
||||
private final ArrayList<String> samples;
|
||||
private final ReadStateManager readStates;
|
||||
|
||||
static private class SAMRecordState {
|
||||
|
|
@ -278,15 +274,30 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData ) {
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples ) {
|
||||
this.readInfo = readInformation;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
|
||||
|
||||
// get the list of samples
|
||||
this.samples = new ArrayList<Sample>(sampleData.getSamples());
|
||||
|
||||
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
if ( this.samples.isEmpty() && samIterator.hasNext() ) {
|
||||
// actually we cannot process BAMs without read groups unless we tolerate empty
|
||||
// sample lists. In the empty case we need to add the null element to the samples
|
||||
this.samples.add(null);
|
||||
//throw new IllegalArgumentException("samples list must not be empty");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
|
||||
* for the system.
|
||||
*/
|
||||
public final static Collection<String> sampleListForSAMWithoutReadGroups() {
|
||||
List<String> samples = new ArrayList<String>();
|
||||
samples.add(null);
|
||||
return samples;
|
||||
}
|
||||
|
||||
public Iterator<AlignmentContext> iterator() {
|
||||
|
|
@ -303,19 +314,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
||||
}
|
||||
|
||||
public void printState() {
|
||||
for(Sample sample: samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecordState state = iterator.next();
|
||||
logger.debug(String.format("printState():"));
|
||||
SAMRecord read = state.getRead();
|
||||
int offset = state.getReadOffset();
|
||||
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private GenomeLoc getLocation() {
|
||||
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
|
||||
}
|
||||
|
|
@ -355,14 +353,14 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
||||
// and shift the position.
|
||||
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
|
||||
Map<Sample,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<Sample,ReadBackedExtendedEventPileupImpl>();
|
||||
Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileupImpl>();
|
||||
|
||||
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
||||
// are associated with the *previous* reference base
|
||||
GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1);
|
||||
|
||||
boolean hasBeenSampled = false;
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
|
@ -426,10 +424,10 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
|
||||
} else {
|
||||
GenomeLoc location = getLocation();
|
||||
Map<Sample,ReadBackedPileupImpl> fullPileup = new HashMap<Sample,ReadBackedPileupImpl>();
|
||||
Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();
|
||||
|
||||
boolean hasBeenSampled = false;
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
|
@ -495,7 +493,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
private void updateReadStates() {
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
Iterator<SAMRecordState> it = readStates.iterator(sample);
|
||||
while ( it.hasNext() ) {
|
||||
SAMRecordState state = it.next();
|
||||
|
|
@ -522,7 +520,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final DownsamplingMethod downsamplingMethod;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<Sample,PerSampleReadStateManager> readStatesBySample = new HashMap<Sample,PerSampleReadStateManager>();
|
||||
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
|
||||
private final int targetCoverage;
|
||||
private int totalReadStates = 0;
|
||||
|
||||
|
|
@ -540,9 +538,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
readStatesBySample.put(sample,new PerSampleReadStateManager());
|
||||
readSelectors.put(sample.getId(),downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
|
||||
readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(readSelectors);
|
||||
|
|
@ -554,7 +552,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* @param sample The sample.
|
||||
* @return Iterator over the reads associated with that sample.
|
||||
*/
|
||||
public Iterator<SAMRecordState> iterator(final Sample sample) {
|
||||
public Iterator<SAMRecordState> iterator(final String sample) {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
|
||||
|
||||
|
|
@ -590,7 +588,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* @param sample The sample.
|
||||
* @return Total number of reads in the given sample.
|
||||
*/
|
||||
public int size(final Sample sample) {
|
||||
public int size(final String sample) {
|
||||
return readStatesBySample.get(sample).size();
|
||||
}
|
||||
|
||||
|
|
@ -600,12 +598,12 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* @param sample Sample, downsampled independently.
|
||||
* @return Integer stop of the furthest undownsampled region.
|
||||
*/
|
||||
public int getDownsamplingExtent(final Sample sample) {
|
||||
public int getDownsamplingExtent(final String sample) {
|
||||
return readStatesBySample.get(sample).getDownsamplingExtent();
|
||||
}
|
||||
|
||||
public SAMRecordState getFirst() {
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
if(!reads.isEmpty())
|
||||
return reads.peek();
|
||||
|
|
@ -639,8 +637,8 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
samplePartitioner.complete();
|
||||
|
||||
for(Sample sample: samples) {
|
||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample.getId());
|
||||
for(final String sample: samples) {
|
||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
|
||||
|
||||
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
|
||||
|
||||
|
|
@ -1072,6 +1070,3 @@ class SamplePartitioner implements ReadSelector {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
* Categorical sample trait for association and analysis
|
||||
*
|
||||
* Samples can have unknown status, be affected or unaffected by the
|
||||
* categorical trait, or they can be marked as actually having an
|
||||
* other trait value (stored in an associated value in the Sample class)
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since Sept. 2011
|
||||
*/
|
||||
public enum Affection {
|
||||
/** Status is unknown */
|
||||
UNKNOWN,
|
||||
/** Suffers from the disease */
|
||||
AFFECTED,
|
||||
/** Unaffected by the disease */
|
||||
UNAFFECTED,
|
||||
/** An "other" trait: value of the trait is stored elsewhere and is an arbitrary string */
|
||||
OTHER
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
* ENUM of possible human genders: male, female, or unknown
|
||||
*/
|
||||
public enum Gender {
|
||||
MALE,
|
||||
FEMALE,
|
||||
UNKNOWN
|
||||
}
|
||||
|
|
@ -0,0 +1,310 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Reads PED file-formatted tabular text files
|
||||
*
|
||||
* See http://www.broadinstitute.org/mpg/tagger/faq.html
|
||||
* See http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
|
||||
*
|
||||
* The "ped" file format refers to the widely-used format for linkage pedigree data.
|
||||
* Each line describes a single (diploid) individual in the following format:
|
||||
*
|
||||
* family_ID individual_ID father_ID mother_ID gender phenotype genotype_1 genotype_2 ...
|
||||
*
|
||||
* If your data lacks pedigree information (for example, unrelated case/control individuals),
|
||||
* set the father_ID and mother_ID to 0. sex denotes the individual's gender with 1=male and 2=female.
|
||||
* phenotype refers to the affected status (for association studies) where 0=unknown, 1=unaffected, 2=affected.
|
||||
* Finally, each genotype is written as two (=diploid) integer numbers (separated by whitespace),
|
||||
* where 1=A, 2=C, 3=G, 4=T. No header lines are allowed and all columns must be separated by whitespace.
|
||||
* Check out the information at the PLINK website on the "ped" file format.
|
||||
*
|
||||
* The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:
|
||||
* Family ID
|
||||
* Individual ID
|
||||
* Paternal ID
|
||||
* Maternal ID
|
||||
* Sex (1=male; 2=female; other=unknown)
|
||||
* Phenotype
|
||||
*
|
||||
* The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
|
||||
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
|
||||
* quantitative trait or an affection status column: PLINK will automatically detect which type
|
||||
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
|
||||
* Note that the GATK actually supports arbitrary values for quantitative trait -- not just doubles --
|
||||
* and are actually representing these values as strings instead of doubles
|
||||
*
|
||||
* NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and
|
||||
* not a comma, i.e. 2.394 not 2,394
|
||||
*
|
||||
* If an individual's sex is unknown, then any character other than 1 or 2 can be used.
|
||||
* When new files are created (PED, FAM, or other which contain sex) then the original coding will be
|
||||
* preserved. However, these individuals will be dropped from any analyses (i.e. phenotype set to missing also)
|
||||
* and an error message will arise if an analysis that uses family information is requested and an
|
||||
* individual of 'unknown' sex is specified as a father or mother.
|
||||
*
|
||||
*
|
||||
* HINT You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
|
||||
* line will be ignored. Do not start any family IDs with this character therefore.
|
||||
*
|
||||
* Affection status, by default, should be coded:
|
||||
* -9 missing
|
||||
* 0 missing
|
||||
* 1 unaffected
|
||||
* 2 affected
|
||||
*
|
||||
* If your file is coded 0/1 to represent unaffected/affected, then use the --1 flag:
|
||||
* plink --file mydata --1 which will specify a disease phenotype coded:
|
||||
*
|
||||
* -9 missing
|
||||
* 0 unaffected
|
||||
* 1 affected
|
||||
*
|
||||
* The missing phenotype value for quantitative traits is, by default, -9 (this can also be used for
|
||||
* disease traits as well as 0). It can be reset by including the --missing-phenotype option:
|
||||
*
|
||||
* Genotypes (column 7 onwards) should also be white-space delimited; they can be any character
|
||||
* (e.g. 1,2,3,4 or A,C,G,T or anything else) except 0 which is, by default, the missing genotype
|
||||
* character. All markers should be biallelic. All SNPs (whether haploid or not) must have two
|
||||
* alleles specified. Either Both alleles should be missing (i.e. 0) or neither.
|
||||
*
|
||||
* No header row should be given. For example, here are two individuals typed for 3 SNPs (one row = one person):
|
||||
*
|
||||
* FAM001 1 0 0 1 2 A A G G A C
|
||||
* FAM001 2 0 0 1 2 A A A G 0 0
|
||||
* ...
|
||||
*
|
||||
* Note that the GATK does not support genotypes in a PED file.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 2011
|
||||
*/
|
||||
public class PedReader {
|
||||
private static Logger logger = Logger.getLogger(PedReader.class);
|
||||
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
|
||||
final static private String commentMarker = "#";
|
||||
|
||||
/**
|
||||
* An enum that specifies which, if any, of the standard PED fields are
|
||||
* missing from the input records. For example, suppose we have the full record:
|
||||
*
|
||||
* "fam1 kid dad mom 1 2"
|
||||
*
|
||||
* indicating a male affected child. This can be parsed with the -ped x.ped argument
|
||||
* to the GATK. Suppose we only have:
|
||||
*
|
||||
* "fam1 kid 1"
|
||||
*
|
||||
* we can parse the reduced version of this record with -ped:NO_PARENTS,NO_PHENOTYPE x.ped
|
||||
*/
|
||||
public enum MissingPedField {
|
||||
/**
|
||||
* The PED records do not have the first (FAMILY_ID) argument. The family id
|
||||
* will be set to null / empty.
|
||||
*/
|
||||
NO_FAMILY_ID,
|
||||
|
||||
/**
|
||||
* The PED records do not have either the paternal or maternal IDs, so
|
||||
* the corresponding IDs are set to null.
|
||||
*/
|
||||
NO_PARENTS,
|
||||
|
||||
/**
|
||||
* The PED records do not have the GENDER field, so the sex of each
|
||||
* sample will be set to UNKNOWN.
|
||||
*/
|
||||
NO_SEX,
|
||||
|
||||
/**
|
||||
* The PED records do not have the PHENOTYPE field, so the phenotype
|
||||
* of each sample will be set to UNKNOWN.
|
||||
*/
|
||||
NO_PHENOTYPE
|
||||
}
|
||||
|
||||
protected enum Field {
|
||||
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
|
||||
}
|
||||
|
||||
// phenotype
|
||||
private final static String MISSING_VALUE1 = "-9";
|
||||
private final static String MISSING_VALUE2 = "0";
|
||||
private final static String PHENOTYPE_UNAFFECTED = "1";
|
||||
private final static String PHENOTYPE_AFFECTED = "2";
|
||||
|
||||
// Sex
|
||||
private final static String SEX_MALE = "1";
|
||||
private final static String SEX_FEMALE = "2";
|
||||
// other=unknown
|
||||
|
||||
public PedReader() { }
|
||||
|
||||
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException {
|
||||
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
|
||||
return parse(new FileReader(source), missingFields, sampleDB);
|
||||
}
|
||||
|
||||
public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
|
||||
logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
|
||||
return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
|
||||
}
|
||||
|
||||
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
|
||||
final List<String> lines = new XReadLines(reader).readLines();
|
||||
|
||||
// What are the record offsets?
|
||||
final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
|
||||
final int samplePos = familyPos + 1;
|
||||
final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
|
||||
final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
|
||||
final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
|
||||
final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
|
||||
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
|
||||
|
||||
// go through once and determine properties
|
||||
int lineNo = 1;
|
||||
boolean isQT = false;
|
||||
final List<String[]> splits = new ArrayList<String[]>(lines.size());
|
||||
for ( final String line : lines ) {
|
||||
if ( line.startsWith(commentMarker)) continue;
|
||||
if ( line.trim().equals("") ) continue;
|
||||
|
||||
final String[] parts = line.split("\\s+");
|
||||
|
||||
if ( parts.length != nExpectedFields )
|
||||
throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
|
||||
|
||||
if ( phenotypePos != -1 ) {
|
||||
isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
|
||||
}
|
||||
|
||||
splits.add(parts);
|
||||
lineNo++;
|
||||
}
|
||||
logger.info("Phenotype is other? " + isQT);
|
||||
|
||||
// now go through and parse each record
|
||||
lineNo = 1;
|
||||
final List<Sample> samples = new ArrayList<Sample>(splits.size());
|
||||
for ( final String[] parts : splits ) {
|
||||
String familyID = null, individualID, paternalID = null, maternalID = null;
|
||||
Gender sex = Gender.UNKNOWN;
|
||||
String quantitativePhenotype = Sample.UNSET_QT;
|
||||
Affection affection = Affection.UNKNOWN;
|
||||
|
||||
if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
|
||||
individualID = parts[samplePos];
|
||||
if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
|
||||
if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
|
||||
|
||||
if ( sexPos != -1 ) {
|
||||
if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
|
||||
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
|
||||
else sex = Gender.UNKNOWN;
|
||||
}
|
||||
|
||||
if ( phenotypePos != -1 ) {
|
||||
if ( isQT ) {
|
||||
if ( parts[phenotypePos].equals(MISSING_VALUE1) )
|
||||
affection = Affection.UNKNOWN;
|
||||
else {
|
||||
affection = Affection.OTHER;
|
||||
quantitativePhenotype = parts[phenotypePos];
|
||||
}
|
||||
} else {
|
||||
if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
|
||||
else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
|
||||
else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
|
||||
}
|
||||
}
|
||||
|
||||
final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
|
||||
samples.add(s);
|
||||
sampleDB.addSample(s);
|
||||
lineNo++;
|
||||
}
|
||||
|
||||
for ( final Sample sample : new ArrayList<Sample>(samples) ) {
|
||||
Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
|
||||
if ( dad != null ) samples.add(dad);
|
||||
|
||||
Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
|
||||
if ( mom != null ) samples.add(mom);
|
||||
}
|
||||
|
||||
return samples;
|
||||
}
|
||||
|
||||
private final static String maybeMissing(final String string) {
|
||||
if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
|
||||
return null;
|
||||
else
|
||||
return string;
|
||||
}
|
||||
|
||||
private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
|
||||
if ( id != null && sampleDB.getSample(id) == null ) {
|
||||
Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
|
||||
sampleDB.addSample(s);
|
||||
return s;
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a list of tags from the command line, assuming it comes from the GATK Engine
|
||||
* tags, and returns the corresponding EnumSet.
|
||||
*
|
||||
* @param arg the actual engine arg, used for the UserException if there's an error
|
||||
* @param tags a list of string tags that should be converted to the MissingPedField value
|
||||
* @return
|
||||
*/
|
||||
public static final EnumSet<MissingPedField> parseMissingFieldTags(final Object arg, final List<String> tags) {
|
||||
final EnumSet<MissingPedField> missingFields = EnumSet.noneOf(MissingPedField.class);
|
||||
|
||||
for ( final String tag : tags ) {
|
||||
try {
|
||||
missingFields.add(MissingPedField.valueOf(tag));
|
||||
} catch ( IllegalArgumentException e ) {
|
||||
throw new UserException.BadArgumentValue(arg.toString(), "Unknown tag " + tag + " allowed values are " + MissingPedField.values());
|
||||
}
|
||||
}
|
||||
|
||||
return missingFields;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public enum PedigreeValidationType {
|
||||
/**
|
||||
* Require if a pedigree file is provided at all samples in the VCF or BAM files have a corresponding
|
||||
* entry in the pedigree file(s).
|
||||
*/
|
||||
STRICT,
|
||||
|
||||
/**
|
||||
* Do not enforce any overlap between the VCF/BAM samples and the pedigree data
|
||||
* */
|
||||
SILENT
|
||||
}
|
||||
|
|
@ -0,0 +1,222 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class Sample implements Comparable<Sample> { // implements java.io.Serializable {
|
||||
final private String familyID, paternalID, maternalID;
|
||||
final private Gender gender;
|
||||
final private String otherPhenotype;
|
||||
final private Affection affection;
|
||||
final private String ID;
|
||||
final private SampleDB infoDB;
|
||||
final private Map<String, Object> properties = new HashMap<String, Object>();
|
||||
|
||||
public final static String UNSET_QT = null;
|
||||
|
||||
public Sample(final String ID, final SampleDB infoDB,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final Affection affection, final String otherPhenotype) {
|
||||
this.familyID = familyID;
|
||||
this.paternalID = paternalID;
|
||||
this.maternalID = maternalID;
|
||||
this.gender = gender;
|
||||
this.otherPhenotype = otherPhenotype;
|
||||
this.affection = affection;
|
||||
this.ID = ID;
|
||||
this.infoDB = infoDB;
|
||||
}
|
||||
|
||||
protected Sample(final String ID,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final Affection affection, final String otherPhenotype) {
|
||||
this(ID, null, familyID, paternalID, maternalID, gender, affection, otherPhenotype);
|
||||
}
|
||||
|
||||
protected Sample(final String ID,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final Affection affection) {
|
||||
this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
|
||||
}
|
||||
|
||||
|
||||
public Sample(final String ID, final SampleDB infoDB,
|
||||
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
|
||||
this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
|
||||
}
|
||||
|
||||
public Sample(final String ID, final SampleDB infoDB, final Affection affection, final String otherPhenotype) {
|
||||
this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, otherPhenotype);
|
||||
}
|
||||
|
||||
public Sample(String id, SampleDB infoDB) {
|
||||
this(id, infoDB, null, null, null,
|
||||
Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// standard property getters
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
public String getID() {
|
||||
return ID;
|
||||
}
|
||||
|
||||
public String getFamilyID() {
|
||||
return familyID;
|
||||
}
|
||||
|
||||
public String getPaternalID() {
|
||||
return paternalID;
|
||||
}
|
||||
|
||||
public String getMaternalID() {
|
||||
return maternalID;
|
||||
}
|
||||
|
||||
public Affection getAffection() {
|
||||
return affection;
|
||||
}
|
||||
|
||||
public boolean hasOtherPhenotype() {
|
||||
return affection == Affection.OTHER;
|
||||
}
|
||||
|
||||
public String getOtherPhenotype() {
|
||||
return otherPhenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample's mother
|
||||
* @return sample object with relationship mother, if exists, or null
|
||||
*/
|
||||
public Sample getMother() {
|
||||
return infoDB.getSample(maternalID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample's father
|
||||
* @return sample object with relationship father, if exists, or null
|
||||
*/
|
||||
public Sample getFather() {
|
||||
return infoDB.getSample(paternalID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get gender of the sample
|
||||
* @return property of key "gender" - must be of type Gender
|
||||
*/
|
||||
public Gender getGender() {
|
||||
return gender;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final Sample sample) {
|
||||
return ID.compareTo(sample.getID());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
|
||||
getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
|
||||
getOtherPhenotype(), properties);
|
||||
}
|
||||
|
||||
// // -------------------------------------------------------------------------------------
|
||||
// //
|
||||
// // code for working with additional -- none standard -- properites
|
||||
// //
|
||||
// // -------------------------------------------------------------------------------------
|
||||
//
|
||||
// public Map<String, Object> getExtraProperties() {
|
||||
// return Collections.unmodifiableMap(properties);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get one property
|
||||
// * @param key key of property
|
||||
// * @return value of property as generic object
|
||||
// */
|
||||
// public Object getExtraPropertyValue(final String key) {
|
||||
// return properties.get(key);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// *
|
||||
// * @param key property key
|
||||
// * @return true if sample has this property (even if its value is null)
|
||||
// */
|
||||
// public boolean hasExtraProperty(String key) {
|
||||
// return properties.containsKey(key);
|
||||
// }
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return ID.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if(o == null)
|
||||
return false;
|
||||
if(o instanceof Sample) {
|
||||
Sample otherSample = (Sample)o;
|
||||
return ID.equals(otherSample.ID) &&
|
||||
equalOrNull(familyID, otherSample.familyID) &&
|
||||
equalOrNull(paternalID, otherSample.paternalID) &&
|
||||
equalOrNull(maternalID, otherSample.maternalID) &&
|
||||
equalOrNull(gender, otherSample.gender) &&
|
||||
equalOrNull(otherPhenotype, otherSample.otherPhenotype) &&
|
||||
equalOrNull(affection, otherSample.affection) &&
|
||||
equalOrNull(properties, otherSample.properties);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private final static boolean equalOrNull(final Object o1, final Object o2) {
|
||||
if ( o1 == null )
|
||||
return o2 == null;
|
||||
else
|
||||
return o2 == null ? false : o1.equals(o2);
|
||||
}
|
||||
|
||||
private final static <T> T mergeValues(final String name, final String field, final T o1, final T o2, final T emptyValue) {
|
||||
if ( o1 == null || o1.equals(emptyValue) ) {
|
||||
// take o2 if both are null, otherwise keep o2
|
||||
return o2 == null ? null : o2;
|
||||
} else {
|
||||
if ( o2 == null || o2.equals(emptyValue) )
|
||||
return o1; // keep o1, since it's a real value
|
||||
else {
|
||||
// both o1 and o2 have a value
|
||||
if ( o1 == o2 )
|
||||
return o1;
|
||||
else
|
||||
throw new UserException("Inconsistent values detected for " + name + " for field " + field + " value1 " + o1 + " value2 " + o2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public final static Sample mergeSamples(final Sample prev, final Sample next) {
|
||||
if ( prev.equals(next) )
|
||||
return next;
|
||||
else {
|
||||
return new Sample(prev.getID(), prev.infoDB,
|
||||
mergeValues(prev.getID(), "Family_ID", prev.getFamilyID(), next.getFamilyID(), null),
|
||||
mergeValues(prev.getID(), "Paternal_ID", prev.getPaternalID(), next.getPaternalID(), null),
|
||||
mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
|
||||
mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
|
||||
mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
|
||||
mergeValues(prev.getID(), "OtherPhenotype", prev.getOtherPhenotype(), next.getOtherPhenotype(), UNSET_QT));
|
||||
//mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SampleDB {
|
||||
/**
|
||||
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
|
||||
* this is stored as a HashMap.
|
||||
*/
|
||||
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
|
||||
|
||||
/**
|
||||
* Constructor takes both a SAM header and sample files because the two must be integrated.
|
||||
*/
|
||||
public SampleDB() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Protected function to add a single sample to the database
|
||||
*
|
||||
* @param sample to be added
|
||||
*/
|
||||
protected SampleDB addSample(Sample sample) {
|
||||
Sample prev = samples.get(sample.getID());
|
||||
if ( prev != null )
|
||||
sample = Sample.mergeSamples(prev, sample);
|
||||
samples.put(sample.getID(), sample);
|
||||
return this;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions for getting a sample from the DB
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get a sample by its ID
|
||||
* If an alias is passed in, return the main sample object
|
||||
* @param id
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(String id) {
|
||||
return samples.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param read
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(final SAMRecord read) {
|
||||
return getSample(read.getReadGroup());
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param rg
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(final SAMReadGroupRecord rg) {
|
||||
return getSample(rg.getSample());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param g Genotype
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(final Genotype g) {
|
||||
return getSample(g.getSampleName());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions for accessing samples in the DB
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get number of sample objects
|
||||
* @return size of samples map
|
||||
*/
|
||||
public int sampleCount() {
|
||||
return samples.size();
|
||||
}
|
||||
|
||||
public Set<Sample> getSamples() {
|
||||
return new HashSet<Sample>(samples.values());
|
||||
}
|
||||
|
||||
public Collection<String> getSampleNames() {
|
||||
return Collections.unmodifiableCollection(samples.keySet());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Takes a collection of sample names and returns their corresponding sample objects
|
||||
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
|
||||
* @param sampleNameList Set of sample names
|
||||
* @return Corresponding set of samples
|
||||
*/
|
||||
public Set<Sample> getSamples(Collection<String> sampleNameList) {
|
||||
HashSet<Sample> samples = new HashSet<Sample>();
|
||||
for (String name : sampleNameList) {
|
||||
try {
|
||||
samples.add(getSample(name));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("Could not get sample with the following ID: " + name, e);
|
||||
}
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Higher level pedigree functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a sorted set of the family IDs in all samples (excluding null ids)
|
||||
* @return
|
||||
*/
|
||||
public final Set<String> getFamilyIDs() {
|
||||
return getFamilies().keySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a map from family ID -> set of family members for all samples with
|
||||
* non-null family ids
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public final Map<String, Set<Sample>> getFamilies() {
|
||||
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
|
||||
|
||||
for ( final Sample sample : samples.values() ) {
|
||||
final String famID = sample.getFamilyID();
|
||||
if ( famID != null ) {
|
||||
if ( ! families.containsKey(famID) )
|
||||
families.put(famID, new TreeSet<Sample>());
|
||||
families.get(famID).add(sample);
|
||||
}
|
||||
}
|
||||
|
||||
return families;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all samples with a given family ID
|
||||
* @param familyId
|
||||
* @return
|
||||
*/
|
||||
public Set<Sample> getFamily(String familyId) {
|
||||
return getFamilies().get(familyId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all children of a given sample
|
||||
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
|
||||
* @param sample
|
||||
* @return
|
||||
*/
|
||||
public Set<Sample> getChildren(Sample sample) {
|
||||
final HashSet<Sample> children = new HashSet<Sample>();
|
||||
for ( final Sample familyMember : getFamily(sample.getFamilyID())) {
|
||||
if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) {
|
||||
children.add(familyMember);
|
||||
}
|
||||
}
|
||||
return children;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SampleDBBuilder {
|
||||
PedigreeValidationType validationStrictness;
|
||||
final SampleDB sampleDB = new SampleDB();
|
||||
final GenomeAnalysisEngine engine;
|
||||
|
||||
Set<Sample> samplesFromDataSources = new HashSet<Sample>();
|
||||
Set<Sample> samplesFromPedigrees = new HashSet<Sample>();
|
||||
|
||||
/** for testing only */
|
||||
protected SampleDBBuilder(PedigreeValidationType validationStrictness) {
|
||||
engine = null;
|
||||
this.validationStrictness = validationStrictness;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor takes both a SAM header and sample files because the two must be integrated.
|
||||
*/
|
||||
public SampleDBBuilder(GenomeAnalysisEngine engine, PedigreeValidationType validationStrictness) {
|
||||
this.engine = engine;
|
||||
this.validationStrictness = validationStrictness;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hallucinates sample objects for all the samples in the SAM file and stores them
|
||||
*/
|
||||
public SampleDBBuilder addSamplesFromSAMHeader(final SAMFileHeader header) {
|
||||
addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromSampleNames(final Collection<String> sampleNames) {
|
||||
for (final String sampleName : sampleNames) {
|
||||
if (sampleDB.getSample(sampleName) == null) {
|
||||
final Sample newSample = new Sample(sampleName, sampleDB);
|
||||
sampleDB.addSample(newSample);
|
||||
samplesFromDataSources.add(newSample); // keep track of data source samples
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromPedigreeFiles(final List<File> pedigreeFiles) {
|
||||
for (final File pedFile : pedigreeFiles) {
|
||||
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedFile);
|
||||
samplesFromPedigrees.addAll(samples);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromPedigreeStrings(final List<String> pedigreeStrings) {
|
||||
for (final String pedString : pedigreeStrings) {
|
||||
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedString);
|
||||
samplesFromPedigrees.addAll(samples);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse one sample file and integrate it with samples that are already there
|
||||
* Fail quickly if we find any errors in the file
|
||||
*/
|
||||
private Collection<Sample> addSamplesFromPedigreeArgument(File sampleFile) {
|
||||
final PedReader reader = new PedReader();
|
||||
|
||||
try {
|
||||
return reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(sampleFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
private Collection<Sample> addSamplesFromPedigreeArgument(final String string) {
|
||||
final PedReader reader = new PedReader();
|
||||
return reader.parse(string, getMissingFields(string), sampleDB);
|
||||
}
|
||||
|
||||
public SampleDB getFinalSampleDB() {
|
||||
validate();
|
||||
return sampleDB;
|
||||
}
|
||||
|
||||
public EnumSet<PedReader.MissingPedField> getMissingFields(final Object engineArg) {
|
||||
if ( engine == null )
|
||||
return EnumSet.noneOf(PedReader.MissingPedField.class);
|
||||
else {
|
||||
final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
|
||||
return PedReader.parseMissingFieldTags(engineArg, posTags);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Validation
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
protected final void validate() {
|
||||
if ( validationStrictness == PedigreeValidationType.SILENT )
|
||||
return;
|
||||
else {
|
||||
// check that samples in data sources are all annotated, if anything is annotated
|
||||
if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
|
||||
final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
|
||||
for ( final Sample pSample : samplesFromPedigrees )
|
||||
sampleNamesFromPedigrees.add(pSample.getID());
|
||||
|
||||
for ( final Sample dsSample : samplesFromDataSources )
|
||||
if ( ! sampleNamesFromPedigrees.contains(dsSample.getID()) )
|
||||
throw new UserException("Sample " + dsSample.getID() + " found in data sources but not in pedigree files with STRICT pedigree validation");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -30,11 +30,12 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -87,6 +88,14 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
return getToolkit().getMasterSequenceDictionary();
|
||||
}
|
||||
|
||||
protected SampleDB getSampleDB() {
|
||||
return getToolkit().getSampleDB();
|
||||
}
|
||||
|
||||
protected Sample getSample(final String id) {
|
||||
return getToolkit().getSampleDB().getSample(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* (conceptual static) method that states whether you want to see reads piling up at a locus
|
||||
* that contain a deletion at the locus.
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Gender;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -247,7 +248,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes();
|
||||
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
|
||||
for ( String sample : samples ) {
|
||||
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getToolkit().getSampleById(sample).isMale();
|
||||
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;
|
||||
|
||||
Genotype genotype;
|
||||
boolean isValidation;
|
||||
|
|
|
|||
|
|
@ -227,9 +227,8 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
|
||||
@Override
|
||||
public void initialize() {
|
||||
if ( getToolkit().getSamples().size() != 2 ) {
|
||||
// unbelievably there are actually two samples even when there's just one in the header. God I hate this Samples system
|
||||
throw new UserException.BadArgumentValue("-I", "CallableLoci only works for a single sample, but multiple samples were found in the provided BAM files: " + getToolkit().getSamples());
|
||||
if ( getSampleDB().getSamples().size() != 1 ) {
|
||||
throw new UserException.BadArgumentValue("-I", "CallableLoci only works for a single sample, but multiple samples were found in the provided BAM files: " + getSampleDB().getSamples());
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
|
|
@ -281,20 +282,14 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
|
|||
private HashSet<String> getSamplesFromToolKit(DoCOutputType.Partition type) {
|
||||
HashSet<String> partition = new HashSet<String>();
|
||||
if ( type == DoCOutputType.Partition.sample ) {
|
||||
for ( Set<String> sampleSet : getToolkit().getSamplesByReaders() ) {
|
||||
for ( String s : sampleSet ) {
|
||||
partition.add(s);
|
||||
}
|
||||
}
|
||||
partition.addAll(SampleUtils.getSAMFileSamples(getToolkit()));
|
||||
} else if ( type == DoCOutputType.Partition.readgroup ) {
|
||||
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
|
||||
partition.add(rg.getSample()+"_rg_"+rg.getReadGroupId());
|
||||
}
|
||||
} else if ( type == DoCOutputType.Partition.library ) {
|
||||
for ( Set<String> libraries : getToolkit().getLibrariesByReaders() ) {
|
||||
for ( String l : libraries ) {
|
||||
partition.add(l);
|
||||
}
|
||||
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
|
||||
partition.add(rg.getLibrary());
|
||||
}
|
||||
} else if ( type == DoCOutputType.Partition.center ) {
|
||||
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
|
||||
|
|
|
|||
|
|
@ -248,6 +248,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
@Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file")
|
||||
protected String N_WAY_OUT = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="generate_nWayOut_md5s",doc="Generate md5sums for BAMs")
|
||||
protected boolean generateMD5s = false;
|
||||
|
||||
// DEBUGGING OPTIONS FOLLOW
|
||||
|
||||
|
|
@ -401,9 +404,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// if ( args.containsKey("disable_bam_indexing") ) { System.out.println("NO INDEXING!!"); System.exit(1); createIndex = false; }
|
||||
|
||||
if ( N_WAY_OUT.toUpperCase().endsWith(".MAP") ) {
|
||||
writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex);
|
||||
writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s);
|
||||
} else {
|
||||
writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex);
|
||||
writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -32,16 +32,11 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
|
|
|
|||
|
|
@ -36,12 +36,8 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
|||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
|
|
@ -51,6 +47,9 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.collections.CircularArray;
|
||||
import org.broadinstitute.sting.utils.collections.PrimitivePair;
|
||||
|
|
@ -392,7 +391,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
|
||||
|
||||
normalSamples = getToolkit().getSamplesByReaders().get(0);
|
||||
normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0));
|
||||
|
||||
try {
|
||||
// we already checked that bedOutput and output_file are not set simultaneously
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -1095,14 +1094,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
// filter the read-base pileup based on min base and mapping qualities:
|
||||
pileup = pileup.getBaseAndMappingFilteredPileup(MIN_BASE_QUALITY_SCORE, MIN_MAPPING_QUALITY_SCORE);
|
||||
if (pileup != null) {
|
||||
for (Sample sample : pileup.getSamples()) {
|
||||
for (final String sample : pileup.getSamples()) {
|
||||
ReadBackedPileup samplePileup = pileup.getPileupForSample(sample);
|
||||
ReadBasesAtPosition readBases = new ReadBasesAtPosition();
|
||||
for (PileupElement p : samplePileup) {
|
||||
if (!p.isDeletion()) // IGNORE deletions for now
|
||||
readBases.putReadBase(p);
|
||||
}
|
||||
sampleReadBases.put(sample.getId(), readBases);
|
||||
sampleReadBases.put(sample, readBases);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Gender;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
||||
/**
|
||||
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
|
||||
* Can also count the number of reads matching a given criterion using read filters (see the
|
||||
* --read-filter command line argument). Simplest example of a read-backed analysis.
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountMalesWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
Sample sample = getSampleDB().getSample(read);
|
||||
return sample.getGender() == Gender.MALE ? 1 : 0;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
}
|
||||
|
|
@ -130,6 +130,10 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
nVariantLoci++;
|
||||
nMixed++;
|
||||
break;
|
||||
case SYMBOLIC:
|
||||
// ignore symbolic alleles, but don't fail
|
||||
// todo - consistent way of treating symbolic alleles thgoughout codebase?
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
|
||||
/**
|
||||
* By default, this tool throws a UserException when it encounters a field without a value in some record. This
|
||||
* is generally useful when you mistype -F CHRMO, so that you get a friendly warning about CHRMO not being
|
||||
* is generally useful when you mistype -F CHROM, so that you get a friendly warning about CHRMO not being
|
||||
* found before the tool runs through 40M 1000G records. However, in some cases you genuinely want to allow such
|
||||
* fields (e.g., AC not being calculated for filtered records, if included). When provided, this argument
|
||||
* will cause VariantsToTable to write out NA values for missing fields instead of throwing an error.
|
||||
|
|
|
|||
|
|
@ -1,13 +1,10 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
|
|
@ -19,9 +16,6 @@ import java.util.regex.Pattern;
|
|||
* Time: 12:38 PM
|
||||
*/
|
||||
public class MendelianViolation {
|
||||
|
||||
|
||||
|
||||
String sampleMom;
|
||||
String sampleDad;
|
||||
String sampleChild;
|
||||
|
|
@ -34,22 +28,15 @@ public class MendelianViolation {
|
|||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||
|
||||
|
||||
public String getSampleMom() {
|
||||
return sampleMom;
|
||||
}
|
||||
|
||||
public String getSampleDad() {
|
||||
return sampleDad;
|
||||
}
|
||||
|
||||
public String getSampleChild() {
|
||||
return sampleChild;
|
||||
}
|
||||
|
||||
public double getMinGenotypeQuality() {
|
||||
return minGenotypeQuality;
|
||||
}
|
||||
|
|
@ -90,37 +77,12 @@ public class MendelianViolation {
|
|||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
*/
|
||||
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
|
||||
sampleMom = sample.getMother().getId();
|
||||
sampleDad = sample.getFather().getId();
|
||||
sampleChild = sample.getId();
|
||||
sampleMom = sample.getMother().getID();
|
||||
sampleDad = sample.getFather().getID();
|
||||
sampleChild = sample.getID();
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The most common constructor to be used when give a YAML file with the relationships to the engine with the -SM option.
|
||||
* @param engine - The GATK engine, use getToolkit(). That's where the sample information is stored.
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
*/
|
||||
public MendelianViolation(GenomeAnalysisEngine engine, double minGenotypeQualityP) {
|
||||
boolean gotSampleInformation = false;
|
||||
Collection<Sample> samples = engine.getSamples();
|
||||
// Iterate through all samples in the sample_metadata file but we really can only take one.
|
||||
for (Sample sample : samples) {
|
||||
if (sample.getMother() != null && sample.getFather() != null) {
|
||||
sampleMom = sample.getMother().getId();
|
||||
sampleDad = sample.getFather().getId();
|
||||
sampleChild = sample.getId();
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
gotSampleInformation = true;
|
||||
break; // we can only deal with one trio information
|
||||
}
|
||||
}
|
||||
if (!gotSampleInformation)
|
||||
throw new UserException("YAML file has no sample with relationship information (mother/father)");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
|
||||
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
|
||||
|
|
@ -158,7 +120,7 @@ public class MendelianViolation {
|
|||
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
|
||||
*
|
||||
*/
|
||||
public boolean isViolation (VariantContext vc)
|
||||
public boolean isViolation(VariantContext vc)
|
||||
{
|
||||
return setAlleles(vc) && isViolation();
|
||||
}
|
||||
|
|
@ -172,42 +134,4 @@ public class MendelianViolation {
|
|||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the likelihood ratio for a mendelian violation
|
||||
*/
|
||||
public double violationLikelihoodRatio(VariantContext vc) {
|
||||
double[] logLikAssignments = new double[27];
|
||||
// the matrix to set up is
|
||||
// MOM DAD CHILD
|
||||
// |- AA
|
||||
// AA AA | AB
|
||||
// |- BB
|
||||
// |- AA
|
||||
// AA AB | AB
|
||||
// |- BB
|
||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||
int offset = 0;
|
||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||
for ( int oChild = 0; oChild < 3; oChild ++ ) {
|
||||
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
|
||||
}
|
||||
}
|
||||
}
|
||||
double[] mvLiks = new double[12];
|
||||
double[] nonMVLiks = new double[15];
|
||||
for ( int i = 0; i < 12; i ++ ) {
|
||||
mvLiks[i] = logLikAssignments[mvOffsets[i]];
|
||||
}
|
||||
|
||||
for ( int i = 0; i < 15; i++) {
|
||||
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
|
||||
}
|
||||
|
||||
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,6 +69,18 @@ public class SampleUtils {
|
|||
return samples;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Same as @link getSAMFileSamples but gets all of the samples
|
||||
* in the SAM files loaded by the engine
|
||||
*
|
||||
* @param engine
|
||||
* @return
|
||||
*/
|
||||
public final static Set<String> getSAMFileSamples(GenomeAnalysisEngine engine) {
|
||||
return SampleUtils.getSAMFileSamples(engine.getSAMFileHeader());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all of the unique sample names from all VCF rods input by the user
|
||||
*
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import com.google.java.contract.Requires;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -215,6 +215,10 @@ public class UserException extends ReviewedStingException {
|
|||
super(String.format("File %s is malformed: %s caused by %s", f.getAbsolutePath(), message, e.getMessage()));
|
||||
}
|
||||
|
||||
public MalformedFile(String name, String message) {
|
||||
super(String.format("File associated with name %s is malformed: %s", name, message));
|
||||
}
|
||||
|
||||
public MalformedFile(String name, String message, Exception e) {
|
||||
super(String.format("File associated with name %s is malformed: %s caused by %s", name, message, e.getMessage()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,11 +26,9 @@ package org.broadinstitute.sting.utils.pileup;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -114,10 +112,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
calculateCachedData();
|
||||
}
|
||||
|
||||
protected AbstractReadBackedPileup(GenomeLoc loc, Map<Sample,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
|
||||
protected AbstractReadBackedPileup(GenomeLoc loc, Map<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
|
||||
this.loc = loc;
|
||||
PerSamplePileupElementTracker<PE> tracker = new PerSamplePileupElementTracker<PE>();
|
||||
for(Map.Entry<Sample,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
|
||||
for(Map.Entry<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
|
||||
tracker.addElements(pileupEntry.getKey(),pileupEntry.getValue().pileupElementTracker);
|
||||
addPileupToCumulativeStats(pileupEntry.getValue());
|
||||
}
|
||||
|
|
@ -213,7 +211,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutDeletions();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
|
|
@ -251,7 +249,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getOverlappingFragmentFilteredPileup();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
|
|
@ -305,7 +303,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutMappingQualityZeroReads();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
|
|
@ -334,7 +332,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPositiveStrandPileup();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
|
|
@ -363,7 +361,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getNegativeStrandPileup();
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
|
|
@ -393,7 +391,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getFilteredPileup(filter);
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
|
|
@ -425,7 +423,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ,minMapQ);
|
||||
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||
|
|
@ -492,7 +490,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroup(targetReadGroupId);
|
||||
if(pileup != null)
|
||||
|
|
@ -523,7 +521,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForLane(laneID);
|
||||
if(pileup != null)
|
||||
|
|
@ -550,14 +548,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
}
|
||||
|
||||
public Collection<String> getSampleNames() {
|
||||
public Collection<String> getSamples() {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
Collection<String> sampleNames = new HashSet<String>();
|
||||
for (Sample sample : tracker.getSamples()) {
|
||||
sampleNames.add(sample.getId());
|
||||
}
|
||||
return sampleNames;
|
||||
return new HashSet<String>(tracker.getSamples());
|
||||
}
|
||||
else {
|
||||
Collection<String> sampleNames = new HashSet<String>();
|
||||
|
|
@ -570,16 +564,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Sample> getSamples() {
|
||||
if(!(pileupElementTracker instanceof PerSamplePileupElementTracker)) {
|
||||
throw new StingException("Must be an instance of PerSampleElementTracker");
|
||||
}
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
return tracker.getSamples();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
|
|
@ -604,7 +588,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
int current = 0;
|
||||
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||
|
||||
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
|
|
@ -639,7 +623,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
|
||||
@Override
|
||||
public RBP getPileupForSampleNames(Collection<String> sampleNames) {
|
||||
public RBP getPileupForSamples(Collection<String> sampleNames) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PileupElementTracker<PE> filteredElements = tracker.getElements(sampleNames);
|
||||
|
|
@ -665,7 +649,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
|
||||
@Override
|
||||
public RBP getPileupForSampleName(String sampleName) {
|
||||
public RBP getPileupForSample(String sampleName) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PileupElementTracker<PE> filteredElements = tracker.getElements(sampleName);
|
||||
|
|
@ -688,30 +672,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public RBP getPileupForSample(Sample sample) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PileupElementTracker<PE> filteredElements = tracker.getElements(sample);
|
||||
return filteredElements != null ? (RBP)createNewPileup(loc,filteredElements) : null;
|
||||
}
|
||||
else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
SAMRecord read = p.getRead();
|
||||
if(sample != null) {
|
||||
if(read.getReadGroup() != null && sample.getId().equals(read.getReadGroup().getSample()))
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
else {
|
||||
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
|
|
@ -801,7 +761,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
int[] countsBySample = createNewPileup(loc,tracker.getElements(sample)).getBaseCounts();
|
||||
for(int i = 0; i < counts.length; i++)
|
||||
counts[i] += countsBySample[i];
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
|
@ -42,7 +41,7 @@ class MergingPileupElementIterator<PE extends PileupElement> implements Iterator
|
|||
|
||||
public MergingPileupElementIterator(PerSamplePileupElementTracker<PE> tracker) {
|
||||
perSampleIterators = new PriorityQueue<PeekableIterator<PE>>(Math.max(1,tracker.getSamples().size()),new PileupElementIteratorComparator());
|
||||
for(Sample sample: tracker.getSamples()) {
|
||||
for(final String sample: tracker.getSamples()) {
|
||||
PileupElementTracker<PE> trackerPerSample = tracker.getElements(sample);
|
||||
if(trackerPerSample.size() != 0)
|
||||
perSampleIterators.add(new PeekableIterator<PE>(trackerPerSample.iterator()));
|
||||
|
|
|
|||
|
|
@ -24,8 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -60,52 +58,35 @@ class UnifiedPileupElementTracker<PE extends PileupElement> extends PileupElemen
|
|||
}
|
||||
|
||||
class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
|
||||
private final Map<Sample,PileupElementTracker<PE>> pileup;
|
||||
private final Map<String, Sample> sampleNames = new HashMap<String, Sample>();
|
||||
private final Map<String,PileupElementTracker<PE>> pileup;
|
||||
private int size = 0;
|
||||
|
||||
public PerSamplePileupElementTracker() {
|
||||
pileup = new HashMap<Sample,PileupElementTracker<PE>>();
|
||||
}
|
||||
|
||||
public PerSamplePileupElementTracker(Map<Sample,AbstractReadBackedPileup<?,PE>> pileupsBySample) {
|
||||
pileup = new HashMap<Sample,PileupElementTracker<PE>>();
|
||||
for(Map.Entry<Sample,AbstractReadBackedPileup<?,PE>> entry: pileupsBySample.entrySet()) {
|
||||
Sample sample = entry.getKey();
|
||||
AbstractReadBackedPileup<?,PE> pileupBySample = entry.getValue();
|
||||
pileup.put(sample,pileupBySample.pileupElementTracker);
|
||||
sampleNames.put(sample.getId(), sample);
|
||||
}
|
||||
pileup = new HashMap<String,PileupElementTracker<PE>>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a list of all the samples stored in this pileup.
|
||||
* @return List of samples in this pileup.
|
||||
*/
|
||||
public Collection<Sample> getSamples() {
|
||||
public Collection<String> getSamples() {
|
||||
return pileup.keySet();
|
||||
}
|
||||
|
||||
public PileupElementTracker<PE> getElements(final Sample sample) {
|
||||
public PileupElementTracker<PE> getElements(final String sample) {
|
||||
return pileup.get(sample);
|
||||
}
|
||||
|
||||
public PileupElementTracker<PE> getElements(final String sampleName) {
|
||||
return pileup.get(sampleNames.get(sampleName));
|
||||
}
|
||||
|
||||
public PileupElementTracker<PE> getElements(final Collection<String> selectSampleNames) {
|
||||
PerSamplePileupElementTracker<PE> result = new PerSamplePileupElementTracker<PE>();
|
||||
for (String sample : selectSampleNames) {
|
||||
Sample sampleObject = sampleNames.get(sample);
|
||||
result.addElements(sampleObject, pileup.get(sampleObject));
|
||||
for (final String sample : selectSampleNames) {
|
||||
result.addElements(sample, pileup.get(sample));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public void addElements(final Sample sample, PileupElementTracker<PE> elements) {
|
||||
public void addElements(final String sample, PileupElementTracker<PE> elements) {
|
||||
pileup.put(sample,elements);
|
||||
sampleNames.put(sample.getId(), sample);
|
||||
size += elements.size();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
|
|
@ -121,20 +120,7 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
|
|||
* Gets a list of all the samples stored in this pileup.
|
||||
* @return List of samples in this pileup.
|
||||
*/
|
||||
public Collection<String> getSampleNames();
|
||||
|
||||
/**
|
||||
* Gets a list of all the samples stored in this pileup.
|
||||
* @return List of samples in this pileup.
|
||||
*/
|
||||
public Collection<Sample> getSamples();
|
||||
|
||||
/**
|
||||
* Gets the particular subset of this pileup with the given sample name.
|
||||
* @param sample Name of the sample to use.
|
||||
* @return A subset of this pileup containing only reads with the given sample.
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup getPileupForSample(Sample sample);
|
||||
public Collection<String> getSamples();
|
||||
|
||||
public Iterable<ExtendedEventPileupElement> toExtendedIterable();
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -56,7 +55,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
}
|
||||
|
||||
// this is the good new one
|
||||
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<Sample,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
|
||||
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
|
||||
super(loc,pileupElementsBySample);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
|
||||
|
|
@ -137,18 +136,11 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
|
|||
*/
|
||||
public ReadBackedPileup getPileupForLane(String laneID);
|
||||
|
||||
|
||||
/**
|
||||
* Gets a collection of all the samples stored in this pileup.
|
||||
* @return Collection of samples in this pileup.
|
||||
*/
|
||||
public Collection<Sample> getSamples();
|
||||
|
||||
/**
|
||||
* Gets a collection of *names* of all the samples stored in this pileup.
|
||||
* @return Collection of names
|
||||
*/
|
||||
public Collection<String> getSampleNames();
|
||||
public Collection<String> getSamples();
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -156,7 +148,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
|
|||
* @param sampleNames Name of the sample to use.
|
||||
* @return A subset of this pileup containing only reads with the given sample.
|
||||
*/
|
||||
public ReadBackedPileup getPileupForSampleNames(Collection<String> sampleNames);
|
||||
public ReadBackedPileup getPileupForSamples(Collection<String> sampleNames);
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -164,14 +156,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
|
|||
* @param sampleName Name of the sample to use.
|
||||
* @return A subset of this pileup containing only reads with the given sample.
|
||||
*/
|
||||
public ReadBackedPileup getPileupForSampleName(String sampleName);
|
||||
|
||||
/**
|
||||
* Gets the particular subset of this pileup with the given sample.
|
||||
* @param sample Sample to use.
|
||||
* @return A subset of this pileup containing only reads with the given sample.
|
||||
*/
|
||||
public ReadBackedPileup getPileupForSample(Sample sample);
|
||||
public ReadBackedPileup getPileupForSample(String sampleName);
|
||||
|
||||
/**
|
||||
* Simple useful routine to count the number of deletion bases in this pileup
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -48,7 +47,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
|
|||
super(loc,pileupElements);
|
||||
}
|
||||
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, Map<Sample,ReadBackedPileupImpl> pileupElementsBySample) {
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, Map<String,ReadBackedPileupImpl> pileupElementsBySample) {
|
||||
super(loc,pileupElementsBySample);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -51,18 +51,18 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
|||
private boolean presorted ;
|
||||
GenomeAnalysisEngine toolkit;
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
||||
this.presorted = presorted;
|
||||
this.toolkit = toolkit;
|
||||
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly);
|
||||
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5);
|
||||
}
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly ) {
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
|
||||
this.presorted = presorted;
|
||||
this.toolkit = toolkit;
|
||||
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||
setupByReader(toolkit,ext,order, presorted, indexOnTheFly);
|
||||
setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -73,8 +73,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
|||
* @param toolkit
|
||||
* @param in2out
|
||||
*/
|
||||
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
|
||||
|
||||
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
||||
if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
|
||||
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
||||
|
||||
|
|
@ -88,7 +87,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
|||
if ( writerMap.containsKey( rid ) )
|
||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
||||
|
||||
addWriter(rid,outName, order, presorted, indexOnTheFly);
|
||||
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -101,7 +100,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
|||
* @param toolkit
|
||||
* @param ext
|
||||
*/
|
||||
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
|
||||
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
||||
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
||||
|
||||
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
|
||||
|
|
@ -119,16 +118,19 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
|||
if ( writerMap.containsKey( rid ) )
|
||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
||||
|
||||
addWriter(rid,outName, order, presorted, indexOnTheFly);
|
||||
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
|
||||
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
||||
File f = new File(outName);
|
||||
SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
|
||||
header.setSortOrder(order);
|
||||
SAMFileWriter sw = new SAMFileWriterFactory().setCreateIndex(indexOnTheFly).makeSAMOrBAMWriter(header, presorted, f);
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
factory.setCreateIndex(indexOnTheFly);
|
||||
factory.setCreateMd5File(generateMD5);
|
||||
SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
|
||||
writerMap.put(id,sw);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -99,9 +99,9 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
|
|||
*
|
||||
* @param reader
|
||||
*/
|
||||
public XReadLines(final BufferedReader reader, final boolean trimWhitespace) {
|
||||
public XReadLines(final Reader reader, final boolean trimWhitespace) {
|
||||
try {
|
||||
this.in = reader;
|
||||
this.in = new BufferedReader(reader);
|
||||
nextline = readNextLine();
|
||||
this.trimWhitespace = trimWhitespace;
|
||||
} catch(IOException e) {
|
||||
|
|
@ -109,7 +109,7 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
|
|||
}
|
||||
}
|
||||
|
||||
public XReadLines(final BufferedReader reader) throws FileNotFoundException {
|
||||
public XReadLines(final Reader reader) {
|
||||
this(reader, true);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ import java.util.*;
|
|||
* vc.hasGenotypes()
|
||||
* vc.isMonomorphic()
|
||||
* vc.isPolymorphic()
|
||||
* vc.getSampleNames().size()
|
||||
* vc.getSamples().size()
|
||||
*
|
||||
* vc.getGenotypes()
|
||||
* vc.getGenotypes().get("g1")
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ public class MD5DB {
|
|||
System.out.printf("##### Skipping update, cannot write file %s%n", dbFile);
|
||||
}
|
||||
} else {
|
||||
System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
|
||||
//System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -170,6 +170,18 @@ public class MD5DB {
|
|||
return bytes;
|
||||
}
|
||||
|
||||
public static class MD5Match {
|
||||
final String md5;
|
||||
final String failMessage;
|
||||
boolean failed;
|
||||
|
||||
public MD5Match(final String md5, final String failMessage, final boolean failed) {
|
||||
this.md5 = md5;
|
||||
this.failMessage = failMessage;
|
||||
this.failed = failed;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different.
|
||||
* @param name Name of the test.
|
||||
|
|
@ -178,18 +190,21 @@ public class MD5DB {
|
|||
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
|
||||
* @return The calculated MD5.
|
||||
*/
|
||||
public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
|
||||
public static MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
final String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
|
||||
String failMessage = null;
|
||||
boolean failed = false;
|
||||
|
||||
if (parameterize || expectedMD5.equals("")) {
|
||||
// Don't assert
|
||||
} else if ( filemd5sum.equals(expectedMD5) ) {
|
||||
System.out.println(String.format(" => %s PASSED", name));
|
||||
System.out.println(String.format(" => %s PASSED (expected=%s)", name, expectedMD5));
|
||||
} else {
|
||||
Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
|
||||
failed = true;
|
||||
failMessage = String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum);
|
||||
}
|
||||
|
||||
return filemd5sum;
|
||||
return new MD5Match(filemd5sum, failMessage, failed);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -218,8 +233,8 @@ public class MD5DB {
|
|||
System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b",
|
||||
name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5)));
|
||||
} else {
|
||||
System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
|
||||
System.out.flush();
|
||||
//System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
|
||||
//System.out.flush();
|
||||
|
||||
if ( ! expectedMD5.equals(filemd5sum) ) {
|
||||
// we are going to fail for real in assertEquals (so we are counted by the testing framework).
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ public class WalkerTest extends BaseTest {
|
|||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
}
|
||||
|
||||
public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
|
||||
public MD5DB.MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
|
||||
return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
|
||||
}
|
||||
|
||||
|
|
@ -84,10 +84,23 @@ public class WalkerTest extends BaseTest {
|
|||
|
||||
public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
|
||||
List<String> md5s = new ArrayList<String>();
|
||||
List<MD5DB.MD5Match> fails = new ArrayList<MD5DB.MD5Match>();
|
||||
|
||||
for (int i = 0; i < resultFiles.size(); i++) {
|
||||
String md5 = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
|
||||
maybeValidateSupplementaryFile(name, resultFiles.get(i));
|
||||
md5s.add(i, md5);
|
||||
MD5DB.MD5Match result = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
|
||||
if ( ! result.failed ) {
|
||||
maybeValidateSupplementaryFile(name, resultFiles.get(i));
|
||||
md5s.add(result.md5);
|
||||
} else {
|
||||
fails.add(result);
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! fails.isEmpty() ) {
|
||||
for ( final MD5DB.MD5Match fail : fails ) {
|
||||
logger.warn("Fail: " + fail.failMessage);
|
||||
}
|
||||
Assert.fail("Test failed: " + name);
|
||||
}
|
||||
|
||||
return md5s;
|
||||
|
|
|
|||
|
|
@ -8,11 +8,9 @@ import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
|
|||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -51,7 +49,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
|
||||
Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.<SAMReaderID>emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);
|
||||
|
||||
|
|
@ -67,7 +65,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(shardBounds));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
|
||||
|
|
@ -82,7 +80,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -96,7 +94,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -110,7 +108,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -124,7 +122,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -138,7 +136,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -152,7 +150,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -167,7 +165,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -186,7 +184,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -205,7 +203,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -226,7 +224,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
@ -254,7 +252,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
read07, read08, read09, read10, read11, read12);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
|||
|
||||
import com.google.caliper.Param;
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
|
|
@ -34,15 +33,12 @@ import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
|||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -88,12 +84,9 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
|||
(byte)0);
|
||||
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
SampleDataSource sampleDataSource = new SampleDataSource();
|
||||
sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
|
||||
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
|
||||
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
while(locusIteratorByState.hasNext()) {
|
||||
locusIteratorByState.next().getLocation();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,241 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Sep 9, 2010
|
||||
* Time: 8:21:00 AM
|
||||
*/
|
||||
public class SampleDataSourceUnitTest extends BaseTest {
|
||||
|
||||
// this empty header used to instantiate sampledatasource objects
|
||||
private static SAMFileHeader header = new SAMFileHeader();
|
||||
|
||||
// all the test sample files are located here
|
||||
private String sampleFilesDir = validationDataLocation + "samples/";
|
||||
|
||||
// make sure samples are created from the SAM file correctly
|
||||
@Test()
|
||||
public void loadSAMSamplesTest() {
|
||||
SampleDataSource s = new SampleDataSource(header, null);
|
||||
}
|
||||
|
||||
// tests that a basic sample with relationships loads correctly
|
||||
// Note that this is the only test for family relationships - we may want to expand this
|
||||
@Test()
|
||||
public void basicLoadSampleFileTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
Assert.assertTrue(s.sampleCount() == 5);
|
||||
Sample sampleA = s.getSampleById("sampleA");
|
||||
Sample sampleB = s.getSampleById("sampleB");
|
||||
Assert.assertTrue(sampleB.getMother() == sampleA);
|
||||
Assert.assertTrue(s.getChildren(sampleA).contains(sampleB));
|
||||
Set<Sample> family = s.getFamily("family1");
|
||||
Assert.assertTrue(family.size() == 2);
|
||||
Assert.assertTrue(family.contains(sampleA));
|
||||
Assert.assertTrue(family.contains(sampleB));
|
||||
}
|
||||
|
||||
// but that file should fail if it has an extra character in it...
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void loadInvalidSampleExtraCharText() {
|
||||
File sampleFile = new File(sampleFilesDir + "invalidSyntaxExtraChar.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
}
|
||||
|
||||
// ...or a typo...
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void loadInvalidSampleTypoText() {
|
||||
File sampleFile = new File(sampleFilesDir + "invalidSyntaxTypo.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
|
||||
}
|
||||
|
||||
// ...or an extra unrecognized array
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void loadInvalidSampleExtraArrayText() {
|
||||
File sampleFile = new File(sampleFilesDir + "invalidSyntaxExtraArray.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
}
|
||||
|
||||
// make sure aliases work
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void sampleAliasText() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFileWithAlias.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
// this file has two samples, but one has an alias. let's make sure that checks out...
|
||||
Assert.assertTrue(s.sampleCount() == 3);
|
||||
Assert.assertTrue(s.getSampleById("sampleA") == s.getSampleById("sampleC"));
|
||||
}
|
||||
|
||||
// error is thrown if property is included that's not in properties array
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void unallowedPropertySampleTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFileUnallowedProperty.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
}
|
||||
|
||||
// same as above, with relationship
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void unallowedRelationshipSampleTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFileUnallowedRelationship.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
}
|
||||
|
||||
// two sample files
|
||||
@Test()
|
||||
public void twoSampleFilesTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
|
||||
File secondFile = new File(sampleFilesDir + "basicSampleFileExt.yaml");
|
||||
ArrayList<File> files = new ArrayList<File>();
|
||||
files.add(sampleFile);
|
||||
files.add(secondFile);
|
||||
SampleDataSource s = new SampleDataSource(header, files);
|
||||
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propC").equals("valC"));
|
||||
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
|
||||
}
|
||||
|
||||
// two sample files, with contradictory properties
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void twoContradictorySampleFilesTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
|
||||
File secondFile = new File(sampleFilesDir + "basicSampleFileInvalidExt.yaml");
|
||||
ArrayList<File> files = new ArrayList<File>();
|
||||
files.add(sampleFile);
|
||||
files.add(secondFile);
|
||||
SampleDataSource s = new SampleDataSource(header, files);
|
||||
}
|
||||
|
||||
// three sample files
|
||||
@Test()
|
||||
public void threeSamplesTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
|
||||
ArrayList<File> files = new ArrayList<File>();
|
||||
files.add(sampleFile);
|
||||
files.add(new File(sampleFilesDir + "basicSampleFileExt.yaml"));
|
||||
files.add(new File(sampleFilesDir + "basicSampleFileExt2.yaml"));
|
||||
SampleDataSource s = new SampleDataSource(header, files);
|
||||
Assert.assertTrue(s.sampleCount() == 6);
|
||||
Assert.assertTrue(s.getSampleById("sampleE").getProperty("propC").equals("valC"));
|
||||
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
|
||||
}
|
||||
|
||||
/**
|
||||
* testing getSamplesWithProperty
|
||||
* in this file there are 5 samples - 2 with population "CEU", 1 with population "ABC", 1 with no population,
|
||||
* and then the default null sample
|
||||
*/
|
||||
@Test()
|
||||
public void getSamplesWithPropertyTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
Assert.assertTrue(s.sampleCount() == 5);
|
||||
Set<Sample> ceuSamples = s.getSamplesWithProperty("population", "CEU");
|
||||
Assert.assertTrue(ceuSamples.size() == 2);
|
||||
|
||||
Iterator<Sample> i = ceuSamples.iterator();
|
||||
ArrayList<String> sampleNames = new ArrayList<String>();
|
||||
sampleNames.add(i.next().getId());
|
||||
sampleNames.add(i.next().getId());
|
||||
Assert.assertTrue(sampleNames.contains("sampleA"));
|
||||
Assert.assertTrue(sampleNames.contains("sampleB"));
|
||||
}
|
||||
|
||||
// make sure we can import data types other than Strings
|
||||
@Test()
|
||||
public void sampleTestPropertyType() {
|
||||
File sampleFile = new File(sampleFilesDir + "sampleFileOtherTypes.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
Sample sample = s.getSampleById("sampleA");
|
||||
Assert.assertTrue(sample.getProperty("a").getClass() == Integer.class);
|
||||
Assert.assertTrue(sample.getProperty("b").getClass() == String.class);
|
||||
Assert.assertTrue(sample.getProperty("c").getClass() == Double.class);
|
||||
Assert.assertTrue(sample.getProperty("b").getClass() == String.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* check that getSamplesFromVariantContext works
|
||||
* create a variant context with two sample names, and make sure the right samples are there
|
||||
*/
|
||||
@Test()
|
||||
public void variantContextTest() {
|
||||
SampleDataSource s = new SampleDataSource(header, null);
|
||||
List<Allele> alleleCollection = new ArrayList<Allele>();
|
||||
Allele a1 = Allele.create("A", true);
|
||||
alleleCollection.add(a1);
|
||||
|
||||
Set<Genotype> genotypeCollection = new HashSet<Genotype>();
|
||||
genotypeCollection.add(new Genotype("NA123", alleleCollection));
|
||||
genotypeCollection.add(new Genotype("NA456", alleleCollection));
|
||||
|
||||
VariantContext v = new VariantContext("contextName", "chr1", 1, 1, alleleCollection, genotypeCollection);
|
||||
|
||||
// make sure the set that's returned is the right size
|
||||
HashSet<Sample> set = (HashSet) s.getSamplesByVariantContext(v);
|
||||
Assert.assertTrue(set.size() == 2);
|
||||
|
||||
// make sure both samples are included
|
||||
Iterator<Sample> i = set.iterator();
|
||||
ArrayList<String> sampleNames = new ArrayList<String>();
|
||||
sampleNames.add(i.next().getId());
|
||||
sampleNames.add(i.next().getId());
|
||||
Assert.assertTrue(sampleNames.contains("NA123"));
|
||||
Assert.assertTrue(sampleNames.contains("NA456"));
|
||||
}
|
||||
|
||||
/**
|
||||
* checking subContextFromSampleProperty
|
||||
*/
|
||||
|
||||
/**
|
||||
* check that subContextFromSampleProperty works
|
||||
* create a variant context with four sample names, make sure that it filters correctly to 2
|
||||
*/
|
||||
@Test()
|
||||
public void subContextFromSamplePropertyTest() {
|
||||
|
||||
File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
Assert.assertTrue(s.sampleCount() == 5);
|
||||
|
||||
List<Allele> alleleCollection = new ArrayList<Allele>();
|
||||
Allele a1 = Allele.create("A", true);
|
||||
alleleCollection.add(a1);
|
||||
|
||||
Set<Genotype> genotypeCollection = new HashSet<Genotype>();
|
||||
genotypeCollection.add(new Genotype("NA123", alleleCollection));
|
||||
genotypeCollection.add(new Genotype("sampleA", alleleCollection));
|
||||
genotypeCollection.add(new Genotype("sampleB", alleleCollection));
|
||||
genotypeCollection.add(new Genotype("sampleC", alleleCollection));
|
||||
|
||||
VariantContext v = new VariantContext("contextName", "chr1", 1, 1, alleleCollection, genotypeCollection);
|
||||
VariantContext subContext = s.subContextFromSampleProperty(v, "population", "CEU");
|
||||
|
||||
Assert.assertTrue(subContext.getSampleNames().contains("sampleA"));
|
||||
Assert.assertTrue(subContext.getSampleNames().contains("sampleA"));
|
||||
Assert.assertTrue(subContext.getSampleNames().size() == 2);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// we create lots of single item lists...
|
||||
private ArrayList<File> makeFileList(File file) {
|
||||
ArrayList<File> a = new ArrayList<File>();
|
||||
a.add(file);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Sep 9, 2010
|
||||
* Time: 8:21:00 AM
|
||||
*/
|
||||
public class SampleUnitTest extends BaseTest {
|
||||
|
||||
static Sample sampleA;
|
||||
static Sample sampleA1;
|
||||
static Sample sampleB;
|
||||
static Sample sampleC;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
sampleA = new Sample("sampleA");
|
||||
sampleA.setProperty("uniqueProperty", "uniqueValue");
|
||||
sampleA1 = new Sample("sampleA");
|
||||
sampleA1.setProperty("uniqueProperty", "uniqueValue");
|
||||
sampleB = new Sample("sampleB");
|
||||
sampleC = new Sample("sampleC");
|
||||
sampleC.setProperty("population", "pop1");
|
||||
sampleC.setProperty("gender", Sample.Gender.MALE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing equality
|
||||
*/
|
||||
@Test()
|
||||
public void equalsTest() {
|
||||
Assert.assertTrue(sampleA.equals(sampleA1));
|
||||
Assert.assertFalse(sampleA == sampleA1);
|
||||
Assert.assertFalse(sampleA.equals(sampleB));
|
||||
}
|
||||
|
||||
/**
|
||||
* And hash
|
||||
*/
|
||||
@Test()
|
||||
public void basicHashTest() {
|
||||
Assert.assertFalse(sampleA.hashCode() == sampleB.hashCode());
|
||||
Assert.assertTrue(sampleA.hashCode() == sampleA1.hashCode());
|
||||
}
|
||||
|
||||
/**
|
||||
* Now test the special getter methods
|
||||
*/
|
||||
@Test()
|
||||
public void specialGettersTest() {
|
||||
Assert.assertTrue(sampleC.getId().equals("sampleC"));
|
||||
Assert.assertTrue(sampleC.getPopulation().equals("pop1"));
|
||||
Assert.assertTrue(sampleC.isMale());
|
||||
Assert.assertFalse(sampleA.isMale()); // sample A doesn't have a gender, so this should be false
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -12,7 +11,6 @@ import org.testng.Assert;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -29,11 +27,8 @@ import java.util.*;
|
|||
* testing of the LocusIteratorByState
|
||||
*/
|
||||
public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||
|
||||
private final int MAX_READS = 10;
|
||||
private static SAMFileHeader header;
|
||||
private LocusIteratorByState li;
|
||||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
|
|
@ -42,6 +37,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
private final LocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
|
||||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndelBaseQualityFiltering() {
|
||||
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
|
|
@ -68,7 +68,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
List<SAMRecord> reads = Arrays.asList(before,during,after);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser, new SampleDataSource());
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
||||
boolean foundExtendedEventPileup = false;
|
||||
while (li.hasNext()) {
|
||||
|
|
@ -120,7 +120,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
List<SAMRecord> reads = Arrays.asList(before,during,after);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser, new SampleDataSource());
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
||||
boolean foundExtendedEventPileup = false;
|
||||
while (li.hasNext()) {
|
||||
|
|
@ -154,7 +154,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
|
||||
li = makeLTBS(reads, readAttributes);
|
||||
|
||||
// Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read
|
||||
// and considers it to be an indel-containing read.
|
||||
|
|
@ -167,7 +167,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
|
||||
// Turn on extended events, and make sure the event is found.
|
||||
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
|
||||
li = makeLTBS(reads, readAttributes);
|
||||
|
||||
Assert.assertTrue(li.hasNext(),"LocusIteratorByState with extended events should contain exactly one pileup");
|
||||
alignmentContext = li.next();
|
||||
|
|
@ -203,7 +203,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
List<SAMRecord> reads = Arrays.asList(leadingRead,indelOnlyRead,fullMatchAfterIndel);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),createTestReadProperties(),genomeLocParser,new SampleDataSource());
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
int currentLocus = firstLocus;
|
||||
int numAlignmentContextsFound = 0;
|
||||
|
||||
|
|
@ -260,7 +260,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
List<SAMRecord> reads = Arrays.asList(leadingRead,indelOnlyRead,fullMatchAfterIndel);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
||||
Assert.assertTrue(li.hasNext(),"Missing first locus at " + firstLocus);
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,353 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* UnitTest for PedReader
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 2011
|
||||
*/
|
||||
public class PedReaderUnitTest extends BaseTest {
|
||||
private static Logger logger = Logger.getLogger(PedReaderUnitTest.class);
|
||||
|
||||
private class PedReaderTest extends TestDataProvider {
|
||||
public String fileContents;
|
||||
public List<Sample> expectedSamples;
|
||||
EnumSet<PedReader.MissingPedField> missing;
|
||||
|
||||
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
|
||||
super(PedReaderTest.class, name);
|
||||
this.fileContents = fileContents;
|
||||
this.expectedSamples = expectedSamples;
|
||||
}
|
||||
}
|
||||
|
||||
// Family ID
|
||||
// Individual ID
|
||||
// Paternal ID
|
||||
// Maternal ID
|
||||
// Sex (1=male; 2=female; other=unknown)
|
||||
// Phenotype
|
||||
//
|
||||
// -9 missing
|
||||
// 0 missing
|
||||
// 1 unaffected
|
||||
// 2 affected
|
||||
|
||||
@DataProvider(name = "readerTest")
|
||||
public Object[][] createPEDFiles() {
|
||||
new PedReaderTest("singleRecordMale",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
|
||||
"fam1 kid 0 0 1 1");
|
||||
|
||||
new PedReaderTest("singleRecordFemale",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
|
||||
"fam1 kid 0 0 2 1");
|
||||
|
||||
new PedReaderTest("singleRecordMissingGender",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.UNKNOWN, Affection.UNKNOWN)),
|
||||
"fam1 kid 0 0 0 0");
|
||||
|
||||
// Affection
|
||||
new PedReaderTest("singleRecordAffected",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED)),
|
||||
"fam1 kid 0 0 1 2");
|
||||
|
||||
new PedReaderTest("singleRecordUnaffected",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
|
||||
"fam1 kid 0 0 1 1");
|
||||
|
||||
new PedReaderTest("singleRecordMissingAffection-9",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
|
||||
"fam1 kid 0 0 1 -9");
|
||||
|
||||
new PedReaderTest("singleRecordMissingAffection0",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
|
||||
"fam1 kid 0 0 1 0");
|
||||
|
||||
new PedReaderTest("multipleUnrelated",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
String.format("%s%n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 2"));
|
||||
|
||||
new PedReaderTest("multipleUnrelatedExtraLine",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
String.format("%s%n%s%n %n", // note extra newlines and whitespace
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 2"));
|
||||
|
||||
new PedReaderTest("explicitTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
String.format("%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1",
|
||||
"fam1 mom 0 0 2 2"));
|
||||
|
||||
new PedReaderTest("implicitTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
"fam1 kid dad mom 1 2");
|
||||
|
||||
new PedReaderTest("partialTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
String.format("%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1"));
|
||||
|
||||
new PedReaderTest("bigPedigree",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
|
||||
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
|
||||
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
String.format("%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad granddad1 grandma1 1 1",
|
||||
"fam1 mom granddad2 grandma2 2 2"));
|
||||
|
||||
// Quantitative trait
|
||||
new PedReaderTest("OtherPhenotype",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.OTHER, "1"),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10.0")),
|
||||
String.format("%s%n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 10.0"));
|
||||
|
||||
new PedReaderTest("OtherPhenotypeWithMissing",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10.0")),
|
||||
String.format("%s%n%s",
|
||||
"fam1 s1 0 0 1 -9",
|
||||
"fam2 s2 0 0 2 10.0"));
|
||||
|
||||
new PedReaderTest("OtherPhenotypeOnlyInts",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.OTHER, "1"),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10")),
|
||||
String.format("%s%n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 10"));
|
||||
|
||||
return PedReaderTest.getTests(PedReaderTest.class);
|
||||
}
|
||||
|
||||
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
|
||||
logger.warn("Test " + test);
|
||||
PedReader reader = new PedReader();
|
||||
SampleDB sampleDB = new SampleDB();
|
||||
List<Sample> readSamples = reader.parse(myFileContents, missing, sampleDB);
|
||||
Assert.assertEquals(new HashSet<Sample>(test.expectedSamples), new HashSet<Sample>(readSamples));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTest")
|
||||
public void testPedReader(PedReaderTest test) {
|
||||
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTest")
|
||||
public void testPedReaderWithComments(PedReaderTest test) {
|
||||
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTest")
|
||||
public void testPedReaderWithSemicolons(PedReaderTest test) {
|
||||
runTest(test,
|
||||
test.fileContents.replace(String.format("%n"), ";"),
|
||||
EnumSet.noneOf(PedReader.MissingPedField.class));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// missing format field tests
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private class PedReaderTestMissing extends TestDataProvider {
|
||||
public EnumSet<PedReader.MissingPedField> missingDesc;
|
||||
public EnumSet<PedReader.Field> missingFields;
|
||||
public final String fileContents;
|
||||
public Sample expected;
|
||||
|
||||
|
||||
private PedReaderTestMissing(final String name, final String fileContents,
|
||||
EnumSet<PedReader.MissingPedField> missingDesc,
|
||||
EnumSet<PedReader.Field> missingFields,
|
||||
final Sample expected) {
|
||||
super(PedReaderTestMissing.class, name);
|
||||
this.fileContents = fileContents;
|
||||
this.missingDesc = missingDesc;
|
||||
this.missingFields = missingFields;
|
||||
this.expected = expected;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "readerTestMissing")
|
||||
public Object[][] createPEDFilesWithMissing() {
|
||||
new PedReaderTestMissing("missingFam",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID),
|
||||
EnumSet.of(PedReader.Field.FAMILY_ID),
|
||||
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED));
|
||||
|
||||
new PedReaderTestMissing("missingParents",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_PARENTS),
|
||||
EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID),
|
||||
new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED));
|
||||
|
||||
new PedReaderTestMissing("missingSex",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_SEX),
|
||||
EnumSet.of(PedReader.Field.GENDER),
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED));
|
||||
|
||||
new PedReaderTestMissing("missingPhenotype",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE),
|
||||
EnumSet.of(PedReader.Field.PHENOTYPE),
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN));
|
||||
|
||||
new PedReaderTestMissing("missingEverythingButGender",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID),
|
||||
EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE),
|
||||
new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN));
|
||||
|
||||
|
||||
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTestMissing")
|
||||
public void testPedReaderWithMissing(PedReaderTestMissing test) {
|
||||
final String contents = sliceContents(test.missingFields, test.fileContents);
|
||||
logger.warn("Test " + test);
|
||||
PedReader reader = new PedReader();
|
||||
SampleDB sampleDB = new SampleDB();
|
||||
reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
|
||||
final Sample missingSample = sampleDB.getSample("kid");
|
||||
Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);
|
||||
}
|
||||
|
||||
private final static String sliceContents(EnumSet<PedReader.Field> missingFieldsSet, String full) {
|
||||
List<String> parts = new ArrayList<String>(Arrays.asList(full.split("\\s+")));
|
||||
final List<PedReader.Field> missingFields = new ArrayList<PedReader.Field>(missingFieldsSet);
|
||||
Collections.reverse(missingFields);
|
||||
for ( PedReader.Field field : missingFields )
|
||||
parts.remove(field.ordinal());
|
||||
return Utils.join("\t", parts);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// parsing tags
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private class PedReaderTestTagParsing extends TestDataProvider {
|
||||
public EnumSet<PedReader.MissingPedField> expected;
|
||||
public final List<String> tags;
|
||||
|
||||
private PedReaderTestTagParsing(final List<String> tags, EnumSet<PedReader.MissingPedField> missingDesc) {
|
||||
super(PedReaderTestTagParsing.class);
|
||||
this.tags = tags;
|
||||
this.expected = missingDesc;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "readerTestTagParsing")
|
||||
public Object[][] createReaderTestTagParsing() {
|
||||
new PedReaderTestTagParsing(
|
||||
Collections.<String>emptyList(),
|
||||
EnumSet.noneOf(PedReader.MissingPedField.class));
|
||||
|
||||
new PedReaderTestTagParsing(
|
||||
Arrays.asList("NO_FAMILY_ID"),
|
||||
EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID));
|
||||
|
||||
new PedReaderTestTagParsing(
|
||||
Arrays.asList("NO_PARENTS"),
|
||||
EnumSet.of(PedReader.MissingPedField.NO_PARENTS));
|
||||
|
||||
new PedReaderTestTagParsing(
|
||||
Arrays.asList("NO_PHENOTYPE"),
|
||||
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE));
|
||||
|
||||
new PedReaderTestTagParsing(
|
||||
Arrays.asList("NO_SEX"),
|
||||
EnumSet.of(PedReader.MissingPedField.NO_SEX));
|
||||
|
||||
new PedReaderTestTagParsing(
|
||||
Arrays.asList("NO_SEX", "NO_PHENOTYPE"),
|
||||
EnumSet.of(PedReader.MissingPedField.NO_SEX, PedReader.MissingPedField.NO_PHENOTYPE));
|
||||
|
||||
new PedReaderTestTagParsing(
|
||||
Arrays.asList("NO_SEX", "NO_PHENOTYPE", "NO_PARENTS"),
|
||||
EnumSet.of(PedReader.MissingPedField.NO_SEX, PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS));
|
||||
|
||||
return PedReaderTestTagParsing.getTests(PedReaderTestTagParsing.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTestTagParsing")
|
||||
public void testPedReaderTagParsing(PedReaderTestTagParsing test) {
|
||||
EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", test.tags);
|
||||
Assert.assertEquals(test.expected, parsed, "Failed to properly parse tags " + test.tags);
|
||||
}
|
||||
|
||||
@Test(enabled = true, expectedExceptions = UserException.class)
|
||||
public void testPedReaderTagParsing1() {
|
||||
EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", Arrays.asList("XXX"));
|
||||
}
|
||||
|
||||
@Test(enabled = true, expectedExceptions = UserException.class)
|
||||
public void testPedReaderTagParsing2() {
|
||||
EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", Arrays.asList("NO_SEX", "XXX"));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Sep 9, 2010
|
||||
* Time: 8:21:00 AM
|
||||
*/
|
||||
public class SampleDBUnitTest extends BaseTest {
|
||||
private static SampleDBBuilder builder;
|
||||
// all the test sample files are located here
|
||||
private File testPED = new File(testDir + "ceutrio.ped");
|
||||
|
||||
private static final Set<Sample> testPEDSamples = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
|
||||
|
||||
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
||||
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
||||
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
|
||||
|
||||
private static final String testPEDString =
|
||||
String.format("%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1",
|
||||
"fam1 mom 0 0 2 2");
|
||||
|
||||
private static final String testPEDMultipleFamilies =
|
||||
String.format("%s%n%s%n%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1",
|
||||
"fam1 mom 0 0 2 2",
|
||||
"fam3 s1 d1 m1 2 2",
|
||||
"fam2 s2 d2 m2 2 2");
|
||||
|
||||
private static final String testPEDStringInconsistentGender =
|
||||
"fam1 kid 0 0 2 2";
|
||||
|
||||
private static final Set<Sample> testPEDSamplesAsSet =
|
||||
new HashSet<Sample>(testPEDSamples);
|
||||
|
||||
|
||||
@BeforeMethod
|
||||
public void before() {
|
||||
builder = new SampleDBBuilder(PedigreeValidationType.STRICT);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadPEDFile() {
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testPEDSamplesAsSet, db.getSamples());
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadPEDString() {
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDString));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testPEDSamplesAsSet, db.getSamples());
|
||||
}
|
||||
|
||||
private static final void addSAMHeader() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
ArtificialSAMUtils.createEnumeratedReadGroups(header, Arrays.asList("1", "2", "3"),
|
||||
Arrays.asList("kid", "mom", "dad"));
|
||||
builder.addSamplesFromSAMHeader(header);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadSAMHeader() {
|
||||
addSAMHeader();
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testSAMSamples, db.getSamples());
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadSAMHeaderPlusPED() {
|
||||
addSAMHeader();
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testPEDSamples, db.getSamples());
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadDuplicateData() {
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testPEDSamples, db.getSamples());
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void loadNonExistentFile() {
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(new File("non-existence-file.txt")));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testSAMSamples, db.getSamples());
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void loadInconsistentData() {
|
||||
builder = new SampleDBBuilder(PedigreeValidationType.STRICT);
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender));
|
||||
builder.getFinalSampleDB();
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void sampleInSAMHeaderNotInSamplesDB() {
|
||||
addSAMHeader();
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender));
|
||||
builder.getFinalSampleDB();
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void getFamilyIDs() {
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(db.getFamilyIDs(), new TreeSet<String>(Arrays.asList("fam1", "fam2", "fam3")));
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void getFamily() {
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadFamilyIDs() {
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Map<String, Set<Sample>> families = db.getFamilies();
|
||||
Assert.assertEquals(families.size(), 3);
|
||||
Assert.assertEquals(families.keySet(), new TreeSet<String>(Arrays.asList("fam1", "fam2", "fam3")));
|
||||
|
||||
for ( final String famID : families.keySet() ) {
|
||||
final Set<Sample> fam = families.get(famID);
|
||||
Assert.assertEquals(fam.size(), 3);
|
||||
for ( final Sample sample : fam ) {
|
||||
Assert.assertEquals(sample.getFamilyID(), famID);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SampleUnitTest extends BaseTest {
|
||||
SampleDB db;
|
||||
static Sample fam1A, fam1B, fam1C;
|
||||
static Sample s1, s2;
|
||||
static Sample trait1, trait2, trait3, trait4, trait5;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
db = new SampleDB();
|
||||
|
||||
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN);
|
||||
fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);
|
||||
fam1C = new Sample("1C", db, "fam1", null, null, Gender.FEMALE);
|
||||
|
||||
s1 = new Sample("s1", db);
|
||||
s2 = new Sample("s2", db);
|
||||
|
||||
trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT);
|
||||
trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT);
|
||||
trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT);
|
||||
trait4 = new Sample("t4", db, Affection.OTHER, "1.0");
|
||||
trait5 = new Sample("t4", db, Affection.OTHER, "CEU");
|
||||
}
|
||||
|
||||
/**
|
||||
* Now basic getters
|
||||
*/
|
||||
@Test()
|
||||
public void normalGettersTest() {
|
||||
Assert.assertEquals("1A", fam1A.getID());
|
||||
Assert.assertEquals("fam1", fam1A.getFamilyID());
|
||||
Assert.assertEquals("1B", fam1A.getPaternalID());
|
||||
Assert.assertEquals("1C", fam1A.getMaternalID());
|
||||
Assert.assertEquals(null, fam1B.getPaternalID());
|
||||
Assert.assertEquals(null, fam1B.getMaternalID());
|
||||
|
||||
Assert.assertEquals(Affection.AFFECTED, trait1.getAffection());
|
||||
Assert.assertEquals(Sample.UNSET_QT, trait1.getOtherPhenotype());
|
||||
Assert.assertEquals(Affection.UNAFFECTED, trait2.getAffection());
|
||||
Assert.assertEquals(Sample.UNSET_QT, trait2.getOtherPhenotype());
|
||||
Assert.assertEquals(Affection.UNKNOWN, trait3.getAffection());
|
||||
Assert.assertEquals(Sample.UNSET_QT, trait3.getOtherPhenotype());
|
||||
Assert.assertEquals(Affection.OTHER, trait4.getAffection());
|
||||
Assert.assertEquals("1.0", trait4.getOtherPhenotype());
|
||||
Assert.assertEquals("CEU", trait5.getOtherPhenotype());
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testGenders() {
|
||||
Assert.assertTrue(fam1A.getGender() == Gender.UNKNOWN);
|
||||
Assert.assertTrue(fam1B.getGender() == Gender.MALE);
|
||||
Assert.assertTrue(fam1C.getGender() == Gender.FEMALE);
|
||||
}
|
||||
}
|
||||
|
|
@ -57,11 +57,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
|||
// now add the expected files that get generated
|
||||
spec.addAuxFile("423571e4c05e7934322172654ac6dbb7", baseOutputFile);
|
||||
spec.addAuxFile("9df5e7e07efeb34926c94a724714c219", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("b9a7748e5aec4dc06daed893c901c00d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
|
||||
spec.addAuxFile("aec669d64d9dd652dd088a5341835ea5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
|
||||
spec.addAuxFile("f6dbd74d32a48abe71ce08d300bce983", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
|
||||
spec.addAuxFile("e3a3467ed259ee3680f8d01980f525b7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
|
||||
spec.addAuxFile("471c34ad2e4f7228efd20702d5941ba9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
|
||||
spec.addAuxFile("9667c77284c2c08e647b162d0e9652d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
|
||||
spec.addAuxFile("5a96c75f96d6fa6ee617451d731dae37", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
|
||||
spec.addAuxFile("b82846df660f0aac8429aec57c2a62d6", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
|
||||
|
|
@ -69,11 +69,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
|||
spec.addAuxFile("e70952f241eebb9b5448f2e7cb288131", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
|
||||
spec.addAuxFile("054ed1e184f46d6a170dc9bf6524270c", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
|
||||
spec.addAuxFile("d53431022f7387fe9ac47814ab1fcd88", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("650ee3714da7fbad7832c9d4ad49eb51", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
|
||||
spec.addAuxFile("7dcac2e8962c778081486332a4576dc3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
||||
spec.addAuxFile("a50011571334f17e950ad3ed1149e350", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||
spec.addAuxFile("6f3260504295695d765af639539585c9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
|
||||
spec.addAuxFile("e013cb5b11b0321a81c8dbd7c1863787", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
||||
spec.addAuxFile("661160f571def8c323345b5859cfb9da", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||
spec.addAuxFile("c95a7a6840334cadd0e520939615c77b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
|
||||
|
||||
execute("testBaseOutputNoFiltering",spec);
|
||||
}
|
||||
|
|
@ -90,7 +90,7 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
|||
spec.setOutputFileLocation(baseOutputFile);
|
||||
|
||||
spec.addAuxFile("6ccd7d8970ba98cb95fe41636a070c1c",baseOutputFile);
|
||||
spec.addAuxFile("0ee40f3e5091536c14e077b77557083a",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
|
||||
spec.addAuxFile("7d87783b3d98b928cac16d383ceca807",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
|
||||
|
||||
execute("testNoCoverageDueToFiltering",spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("e6639ea2dc81635c706e6c35921406d7"));
|
||||
Arrays.asList("b27939251539439a382538e507e03507"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("4be308fd9e8167ebee677f62a7a753b7"));
|
||||
Arrays.asList("37e891bf1ac40caec9ea228f39c27e44"));
|
||||
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,16 +25,14 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ public class IntervalIntegrationTest extends WalkerTest {
|
|||
// our base file
|
||||
File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam");
|
||||
spec.setOutputFileLocation(baseOutputFile);
|
||||
spec.addAuxFile("99c266d777e2e167b8153c858c305fda",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
|
||||
spec.addAuxFile("748a38ed5eb0a043dfc7b82f0d1e8063",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
|
||||
spec.addAuxFile("fadcdf88597b9609c5f2a17f4c6eb455", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
|
||||
|
||||
executeTest("testUnmappedReadInclusion",spec);
|
||||
|
|
@ -96,8 +96,8 @@ public class IntervalIntegrationTest extends WalkerTest {
|
|||
// our base file
|
||||
File baseOutputFile = createTempFile("testUnmappedReadExclusion",".bam");
|
||||
spec.setOutputFileLocation(baseOutputFile);
|
||||
spec.addAuxFile("8236f0b2df5a692e54751b08bc3836fa",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
|
||||
spec.addAuxFile("651b42456d31ba24e913297b71b32143", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
|
||||
spec.addAuxFile("80887ba488e53dabd9596ff93070ae75",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
|
||||
spec.addAuxFile("b341d808ecc33217f37c0c0cde2a3e2f", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
|
||||
|
||||
executeTest("testUnmappedReadExclusion",spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -141,9 +140,9 @@ public class ReadBackedPileupUnitTest {
|
|||
ReadBackedPileupImpl sample2Pileup = new ReadBackedPileupImpl(null,
|
||||
Arrays.asList(read2,read4),
|
||||
Arrays.asList(1,1));
|
||||
Map<Sample,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<Sample,ReadBackedPileupImpl>();
|
||||
sampleToPileupMap.put(new Sample(readGroupOne.getSample()),sample1Pileup);
|
||||
sampleToPileupMap.put(new Sample(readGroupTwo.getSample()),sample2Pileup);
|
||||
Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();
|
||||
sampleToPileupMap.put(readGroupOne.getSample(),sample1Pileup);
|
||||
sampleToPileupMap.put(readGroupTwo.getSample(),sample2Pileup);
|
||||
|
||||
ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
|
||||
|
||||
|
|
@ -164,13 +163,13 @@ public class ReadBackedPileupUnitTest {
|
|||
|
||||
@Test
|
||||
public void testGetPileupForSample() {
|
||||
Sample sample1 = new Sample("sample1");
|
||||
Sample sample2 = new Sample("sample2");
|
||||
String sample1 = "sample1";
|
||||
String sample2 = "sample2";
|
||||
|
||||
SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
|
||||
readGroupOne.setSample(sample1.getId());
|
||||
readGroupOne.setSample(sample1);
|
||||
SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
|
||||
readGroupTwo.setSample(sample2.getId());
|
||||
readGroupTwo.setSample(sample2);
|
||||
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
|
||||
header.addReadGroup(readGroupOne);
|
||||
|
|
@ -181,24 +180,20 @@ public class ReadBackedPileupUnitTest {
|
|||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
read2.setAttribute("RG",readGroupTwo.getId());
|
||||
|
||||
Map<Sample,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<Sample,ReadBackedPileupImpl>();
|
||||
Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();
|
||||
sampleToPileupMap.put(sample1,new ReadBackedPileupImpl(null,Collections.singletonList(read1),0));
|
||||
sampleToPileupMap.put(sample2,new ReadBackedPileupImpl(null,Collections.singletonList(read2),0));
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
|
||||
|
||||
ReadBackedPileup sample1Pileup = pileup.getPileupForSample(sample1);
|
||||
Assert.assertEquals(sample1Pileup.size(),1,"Sample 1 pileup has wrong number of elements");
|
||||
Assert.assertEquals(sample1Pileup.getReads().get(0),read1,"Sample 1 pileup has incorrect read");
|
||||
|
||||
ReadBackedPileup sample2Pileup = pileup.getPileupForSampleName(sample2.getId());
|
||||
ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2);
|
||||
Assert.assertEquals(sample2Pileup.size(),1,"Sample 2 pileup has wrong number of elements");
|
||||
Assert.assertEquals(sample2Pileup.getReads().get(0),read2,"Sample 2 pileup has incorrect read");
|
||||
|
||||
ReadBackedPileup missingSamplePileup = pileup.getPileupForSample(new Sample("missing"));
|
||||
ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing");
|
||||
Assert.assertNull(missingSamplePileup,"Pileup for sample 'missing' should be null but isn't");
|
||||
|
||||
missingSamplePileup = pileup.getPileupForSampleName("not here");
|
||||
missingSamplePileup = pileup.getPileupForSample("not here");
|
||||
Assert.assertNull(missingSamplePileup,"Pileup for sample 'not here' should be null but isn't");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.net.URLClassLoader;
|
||||
|
||||
/**
|
||||
* Test to ensure that, given only the VCF jar and its expected dependencies, core VCF classes will load.
|
||||
*/
|
||||
public class VCFJarClassLoadingUnitTest {
|
||||
@Test
|
||||
public void testVCFJarClassLoading() throws ClassNotFoundException, MalformedURLException {
|
||||
URI vcfURI = new File("dist/vcf.jar").toURI();
|
||||
URI tribbleURI = new File("lib/tribble-24.jar").toURI();
|
||||
|
||||
ClassLoader classLoader = new URLClassLoader(new URL[] {vcfURI.toURL(),tribbleURI.toURL()}, null);
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFWriter");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,202 +0,0 @@
|
|||
package org.broadinstitute.sting.queue.qscripts
|
||||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk.RodBind
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
|
||||
class StandardVariantEvaluation extends QScript {
|
||||
// todo -- update to released version when things stabilize
|
||||
@Argument(doc="gatkJarFile", required=false)
|
||||
var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKFromLaptop/trunk/dist/GenomeAnalysisTK.jar")
|
||||
|
||||
@Argument(shortName = "R", doc="B37 reference sequence: defaults to broad standard location", required=false)
|
||||
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
|
||||
|
||||
@Argument(shortName = "intervals", doc="intervals to evaluate. Only supports evaluation on chromosome 20 now, as most evaluation data is there", required=false)
|
||||
val TARGET_INTERVAL: String = "20"
|
||||
|
||||
@Argument(shortName = "includeUnion", doc="If provided, we'll create a union of the evaluation data sets for evaluation", required=false)
|
||||
val CREATE_UNION: Boolean = false
|
||||
|
||||
@Argument(shortName = "dataDir", doc="Path to the standard evaluation data files", required=false)
|
||||
val DATA_DIR = "/humgen/gsa-hpprojects/GATK/data/Comparisons/StandardForEvaluation/b37/"
|
||||
|
||||
@Argument(shortName = "evalStandard1000GCalls", doc="If provided, we'll include some standard 1000G data for evaluation", required=false)
|
||||
val EVAL_STANDARD_1000G_CALLS: Boolean = false
|
||||
|
||||
val COMPS_DIR = DATA_DIR + "/comps/"
|
||||
val EVALS_DIR = DATA_DIR + "/evals/"
|
||||
|
||||
@Argument(shortName = "moreSNPsToEval", doc="Path to additional SNP call sets for evaluation", required=false)
|
||||
val moreSNPsToEval: List[File] = Nil
|
||||
|
||||
@Argument(shortName = "moreIndelsToEval", doc="Path to additional Indel call sets for evaluation", required=false)
|
||||
val moreIndelsToEval: List[File] = Nil
|
||||
|
||||
val VARIANT_TYPES: List[String] = List("indels", "snps")
|
||||
val VARIANT_TYPE_VT: Map[String, List[org.broad.tribble.util.variantcontext.VariantContext.Type]] = Map(
|
||||
"indels" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.INDEL, org.broad.tribble.util.variantcontext.VariantContext.Type.MIXED, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION),
|
||||
"snps" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.SNP, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION)
|
||||
)
|
||||
|
||||
val SITES_DIR: String = "sitesFiles"
|
||||
|
||||
// path to b37 DBSNP
|
||||
@Argument(shortName = "dbsnp", doc="Path to DBSNP **VCF** for evaluation", required=false)
|
||||
val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b37.leftAligned.vcf")
|
||||
//val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf");
|
||||
|
||||
class Comp(val name: String, val evalType: String, val filename: String, val MakeHomVar: Boolean = false) {
|
||||
val originalFile = new File(COMPS_DIR + filename)
|
||||
val file: File = if ( MakeHomVar ) swapExt(originalFile, ".vcf",".homvar.vcf") else originalFile
|
||||
val sitesFile = new File(SITES_DIR + "/" + swapExt(file, ".vcf", ".sites.vcf").getName)
|
||||
}
|
||||
|
||||
class Eval(val name: String, val evalType: String, val filename: String, val overrideFile: File = null ) {
|
||||
val file: File = if ( overrideFile != null ) overrideFile else new File(EVALS_DIR + "/" + filename)
|
||||
}
|
||||
|
||||
var COMPS: List[Comp] = Nil
|
||||
def addComp(comp: Comp) { COMPS = comp :: COMPS }
|
||||
|
||||
var EVALS: List[Eval] = Nil
|
||||
def addEval(eval: Eval) { EVALS = eval :: EVALS }
|
||||
def addEvalFromCMD(file: File, t: String) { addEval(new Eval(file.getName, t, null, file)) }
|
||||
|
||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||
this.logging_level = "INFO";
|
||||
this.jarFile = gatkJarFile;
|
||||
this.intervalsString = List(TARGET_INTERVAL);
|
||||
this.reference_sequence = referenceFile;
|
||||
this.memoryLimit = 2
|
||||
}
|
||||
|
||||
def initializeStandardDataFiles() = {
|
||||
//
|
||||
// Standard evaluation files for indels
|
||||
//
|
||||
addComp(new Comp("NA12878.homvar.GATK", "indels", "Indels.NA12878_WGS.filtered_Q50.0_QD5.0_SB-1.0_HR18.vcf", true))
|
||||
addComp(new Comp("CG.38samples", "indels", "CG.Indels.leftAligned.b37.vcf"))
|
||||
addComp(new Comp("NA12878.homvar.CG", "indels", "NA12878.CG.b37.indels.vcf", true))
|
||||
addComp(new Comp("g1k.pilot1.validation", "indels", "pilot1_indel_validation_2009.b37.vcf"))
|
||||
addComp(new Comp("NA12878.hand_curated", "indels", "NA12878.validated.curated.polymorphic.indels.vcf"))
|
||||
addComp(new Comp("NA12878.Mullikin", "indels", "NA12878.DIPline.NQScm.expanded.chr20.b37.minReads_2_or_gt2bp.vcf"))
|
||||
|
||||
|
||||
//
|
||||
// INDEL call sets
|
||||
//
|
||||
if ( EVAL_STANDARD_1000G_CALLS ) {
|
||||
addEval(new Eval("dindel", "indels", "20110208.chr20.dindel2.EUR.sites.vcf"))
|
||||
addEval(new Eval("si", "indels", "20101123.chr20.si.v2.EUR.sites.vcf"))
|
||||
addEval(new Eval("gatk", "indels", "EUR.phase1.chr20.broad.filtered.indels.sites.vcf"))
|
||||
}
|
||||
|
||||
//
|
||||
// Standard evaluation files for SNPs
|
||||
//
|
||||
addComp(new Comp("NA12878.homvar.GATK", "snps", "NA12878.HiSeq19.cut.vcf", true))
|
||||
addComp(new Comp("CG.38samples", "snps", "CG.38samples.b37.vcf"))
|
||||
addComp(new Comp("NA12878.homvar.CG", "snps", "NA12878.CG.b37.snps.vcf", true))
|
||||
addComp(new Comp("HapMap3.3", "snps", "hapmap3.3.sites_r27_nr.b37_fwd.vcf"))
|
||||
addComp(new Comp("OMNI.2.5M", "snps", "omni2.5.1212samples.b37.sites.chr20.monoAreAC0.vcf"))
|
||||
addComp(new Comp("g1k.pilot1.validation", "snps", "1000G.snp.validation.b37.vcf"))
|
||||
|
||||
//
|
||||
// SNP call sets
|
||||
//
|
||||
if ( EVAL_STANDARD_1000G_CALLS ) {
|
||||
addEval(new Eval("1000G.gatk.eurPlus.phase1", "snps", "EUR+.phase1.chr20.broad.recal.vrcut1p0.sites.vcf"))
|
||||
addEval(new Eval("1000G.high_specificity.phase1", "snps", "ALL.phase1.chr20.projectConsensus.highSpecificity.snps.genotypes.sites.vcf"))
|
||||
}
|
||||
}
|
||||
|
||||
def script = {
|
||||
val sitesDir = new File(SITES_DIR)
|
||||
if ( ! sitesDir.exists ) sitesDir.mkdirs()
|
||||
|
||||
initializeStandardDataFiles();
|
||||
|
||||
// add additional files for evaluation, if necessary
|
||||
moreSNPsToEval.foreach(addEvalFromCMD(_, "snps"))
|
||||
moreIndelsToEval.foreach(addEvalFromCMD(_, "indels"))
|
||||
|
||||
//
|
||||
// create hom-var versions of key files
|
||||
//
|
||||
for ( comp <- COMPS )
|
||||
if ( comp.MakeHomVar )
|
||||
add(new SelectHomVars(comp.originalFile, comp.file))
|
||||
|
||||
for ( comp <- COMPS )
|
||||
add(new JustSites(comp.file, comp.sitesFile))
|
||||
|
||||
//
|
||||
// Loop over evaluation types
|
||||
//
|
||||
for ( evalType <- VARIANT_TYPES ) {
|
||||
var evalsOfType = EVALS.filter(_.evalType == evalType)
|
||||
val compsOfType = COMPS.filter(_.evalType == evalType)
|
||||
|
||||
if ( evalsOfType.size > 0 ) {
|
||||
|
||||
// if desired and possible, create a union.X.vcf file
|
||||
if ( CREATE_UNION && evalsOfType.size > 1 ) {
|
||||
val union: File = new File("union.%s.vcf".format(evalType))
|
||||
add(new MyCombine(evalsOfType.map(_.file), union));
|
||||
evalsOfType = new Eval("union", evalType, null, union) :: evalsOfType
|
||||
}
|
||||
|
||||
// our root VE
|
||||
val VE = new MyEval()
|
||||
VE.VT = VARIANT_TYPE_VT(evalType)
|
||||
VE.o = new File(evalType + ".eval")
|
||||
|
||||
// add evals
|
||||
for ( calls <- evalsOfType )
|
||||
VE.rodBind :+= RodBind("eval_" + calls.name, "VCF", calls.file)
|
||||
|
||||
// add comps
|
||||
//VE.rodBind :+= RodBind("dbsnp", "VCF", MY_DBSNP)
|
||||
for ( comp <- compsOfType )
|
||||
VE.rodBind :+= RodBind("comp_" + comp.name, "VCF", comp.sitesFile)
|
||||
|
||||
add(VE)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Select homozygous non-reference sites from a single deep data set
|
||||
*/
|
||||
class SelectHomVars(@Input(doc="foo") vcf: File, @Output(doc="foo") out: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
|
||||
this.rodBind :+= RodBind("variant", "VCF", vcf)
|
||||
this.o = out
|
||||
this.select ++= List("\"AC == 2\"")
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple union
|
||||
*/
|
||||
class MyCombine(@Input(doc="foo") vcfs: List[File], @Output(doc="foo") out: File) extends CombineVariants with UNIVERSAL_GATK_ARGS {
|
||||
for ( vcf <- vcfs )
|
||||
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
|
||||
this.o = out
|
||||
}
|
||||
|
||||
/**
|
||||
* A command line (cut) that removes all genotyping information from a file
|
||||
*/
|
||||
class JustSites(@Input(doc="foo") in: File, @Output(doc="foo") out: File) extends CommandLineFunction {
|
||||
def commandLine = "cut -f 1-8 %s > %s".format(in, out)
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for VariantEval used here
|
||||
*/
|
||||
class MyEval() extends VariantEval with UNIVERSAL_GATK_ARGS {
|
||||
this.noST = true
|
||||
this.evalModule :+= "ValidationReport"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
fam1 kid dad mom 1 2
|
||||
fam1 dad 0 0 1 1
|
||||
fam1 mom 0 0 2 2
|
||||
Loading…
Reference in New Issue