Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2011-10-06 13:41:45 -04:00
commit 1b0735f0a3
67 changed files with 2047 additions and 2149 deletions

View File

@ -146,12 +146,14 @@
<mkdir dir="${lib.dir}"/>
<mkdir dir="${ivy.jar.dir}"/>
<!-- Comment out the following two lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
<get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
dest="${ivy.jar.dir}/${ivy.jar.file}"
usetimestamp="true"/>
<taskdef resource="org/apache/ivy/ant/antlib.xml"
uri="antlib:org.apache.ivy.ant"
classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
<ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/>
<property name="init.resolve.done" value="true"/>
</target>
@ -546,8 +548,9 @@
<target name="vcf.jar" depends="gatk.compile,init.jar">
<jar jarfile="${dist.dir}/vcf.jar">
<fileset dir="${java.classes}">
<include name="**/utils/codecs/**/*.class"/>
<include name="**/utils/variantcontext/**/*.class"/>
<include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/>
<include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/>
<include name="org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.class"/>
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
</fileset>
@ -914,7 +917,7 @@
</target>
<!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
<target name="test" depends="init.buildall,test.compile" description="Run unit tests">
<target name="test" depends="init.buildall,test.compile,vcf.jar" description="Run unit tests">
<condition property="ttype" value="*UnitTest" else="${single}">
<not><isset property="single"/></not>
</condition>

View File

@ -99,5 +99,5 @@ gsa.read.gatkreport <- function(filename) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
gatkreport = as.list(tableEnv);
gatkreport = as.list(tableEnv, all.names=TRUE);
}

View File

@ -34,28 +34,23 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.util.*;
@ -92,7 +87,7 @@ public class GenomeAnalysisEngine {
/**
* Accessor for sample metadata
*/
private SampleDataSource sampleDataSource = null;
private SampleDB sampleDB = null;
/**
* Accessor for sharded reference-ordered data.
@ -206,6 +201,9 @@ public class GenomeAnalysisEngine {
// Prepare the data for traversal.
initializeDataSources();
// initialize sampleDB
initializeSampleDB();
// initialize and validate the interval list
initializeIntervals();
validateSuppliedIntervals();
@ -692,12 +690,22 @@ public class GenomeAnalysisEngine {
for (ReadFilter filter : filters)
filter.initialize(this);
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
}
/**
* Entry-point function to initialize the samples database from input data and pedigree arguments
*/
private void initializeSampleDB() {
SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(this, argCollection.pedigreeValidationType);
sampleDBBuilder.addSamplesFromSAMHeader(getSAMFileHeader());
sampleDBBuilder.addSamplesFromSampleNames(SampleUtils.getUniqueSamplesFromRods(this));
sampleDBBuilder.addSamplesFromPedigreeFiles(argCollection.pedigreeFiles);
sampleDBBuilder.addSamplesFromPedigreeStrings(argCollection.pedigreeStrings);
sampleDB = sampleDBBuilder.getFinalSampleDB();
}
/**
* Gets a unique identifier for the reader sourcing this read.
* @param read Read to examine.
@ -716,100 +724,6 @@ public class GenomeAnalysisEngine {
return getReadsDataSource().getSAMFile(id);
}
/**
* Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names
* found in the corresponding bam file.
*
* @return Sets of samples in the merged input SAM stream, grouped by readers
*/
public List<Set<String>> getSamplesByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> samples = new HashSet<String>(1);
sample_sets.add(samples);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
samples.add(g.getSample());
}
}
return sample_sets;
}
/**
* Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of library names
* found in the corresponding bam file.
*
* @return Sets of libraries present in the (merged) input SAM stream, grouped by readers
*/
public List<Set<String>> getLibrariesByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> libs = new HashSet<String>(2);
lib_sets.add(libs);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
libs.add(g.getLibrary());
}
}
return lib_sets;
}
/**
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
*
* Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
* (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
*
* @return sets of (merged) read group ids in order of input bams
*/
public List<Set<String>> getMergedReadGroupsByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> groups = new HashSet<String>(5);
rg_sets.add(groups);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
// use HeaderMerger to translate original read group id from the reader into the read group id in the
// merged stream, and save that remapped read group id to associate it with specific reader
groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
} else {
// otherwise, pass through the unmapped read groups since this is what Picard does as well
groups.add(g.getReadGroupId());
}
}
}
return rg_sets;
}
/**
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
*
@ -929,6 +843,18 @@ public class GenomeAnalysisEngine {
return readsDataSource.getHeader(reader);
}
/**
* Returns an ordered list of the unmerged SAM file headers known to this engine.
* @return list of header for each input SAM file, in command line order
*/
public List<SAMFileHeader> getSAMFileHeaders() {
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) {
headers.add(getReadsDataSource().getHeader(id));
}
return headers;
}
/**
* Gets the master sequence dictionary for this GATK engine instance
* @return a never-null dictionary listing all of the contigs known to this engine instance
@ -947,8 +873,6 @@ public class GenomeAnalysisEngine {
return this.readsDataSource;
}
/**
* Sets the collection of GATK main application arguments.
*
@ -1035,140 +959,14 @@ public class GenomeAnalysisEngine {
return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
}
public SampleDataSource getSampleMetadata() {
return this.sampleDataSource;
}
// -------------------------------------------------------------------------------------
//
// code for working with Samples database
//
// -------------------------------------------------------------------------------------
/**
* Get a sample by its ID
* If an alias is passed in, return the main sample object
* @param id sample id
* @return sample Object with this ID
*/
public Sample getSampleById(String id) {
return sampleDataSource.getSampleById(id);
}
/**
* Get the sample for a given read group
* Must first look up ID for read group
* @param readGroup of sample
* @return sample object with ID from the read group
*/
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
return sampleDataSource.getSampleByReadGroup(readGroup);
}
/**
* Get a sample for a given read
* Must first look up read group, and then sample ID for that read group
* @param read of sample
* @return sample object of this read
*/
public Sample getSampleByRead(SAMRecord read) {
return getSampleByReadGroup(read.getReadGroup());
}
/**
* Get number of sample objects
* @return size of samples map
*/
public int sampleCount() {
return sampleDataSource.sampleCount();
}
/**
* Return all samples with a given family ID
* Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
* @param familyId family ID
* @return Samples with the given family ID
*/
public Set<Sample> getFamily(String familyId) {
return sampleDataSource.getFamily(familyId);
}
/**
* Returns all children of a given sample
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
* @param sample parent sample
* @return children of the given sample
*/
public Set<Sample> getChildren(Sample sample) {
return sampleDataSource.getChildren(sample);
}
/**
* Gets all the samples
* @return
*/
public Collection<Sample> getSamples() {
return sampleDataSource.getSamples();
}
/**
* Takes a list of sample names and returns their corresponding sample objects
*
* @param sampleNameList List of sample names
* @return Corresponding set of samples
*/
public Set<Sample> getSamples(Collection<String> sampleNameList) {
return sampleDataSource.getSamples(sampleNameList);
}
/**
* Returns a set of samples that have any value (which could be null) for a given property
* @param key Property key
* @return Set of samples with the property
*/
public Set<Sample> getSamplesWithProperty(String key) {
return sampleDataSource.getSamplesWithProperty(key);
}
/**
* Returns a set of samples that have a property with a certain value
* Value must be a string for now - could add a similar method for matching any objects in the future
*
* @param key Property key
* @param value String property value
* @return Set of samples that match key and value
*/
public Set<Sample> getSamplesWithProperty(String key, String value) {
return sampleDataSource.getSamplesWithProperty(key, value);
}
/**
* Returns a set of sample objects for the sample names in a variant context
*
* @param context Any variant context
* @return a set of the sample objects
*/
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
Set<Sample> samples = new HashSet<Sample>();
for (String sampleName : context.getSampleNames()) {
samples.add(sampleDataSource.getOrCreateSample(sampleName));
}
return samples;
}
/**
* Returns all samples that were referenced in the SAM file
*/
public Set<Sample> getSAMFileSamples() {
return sampleDataSource.getSAMFileSamples();
}
/**
* Return a subcontext restricted to samples with a given property key/value
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
* @param context VariantContext to filter
* @param key property key
* @param value property value (must be string)
* @return subcontext
*/
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
return sampleDataSource.subContextFromSampleProperty(context, key, value);
public SampleDB getSampleDB() {
return this.sampleDB;
}
public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
@ -44,10 +45,7 @@ import org.simpleframework.xml.stream.HyphenStyle;
import java.io.File;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* @author aaron
@ -72,11 +70,6 @@ public class GATKArgumentCollection {
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
public List<String> samFiles = new ArrayList<String>();
// parameters and their defaults
@ElementList(required = false)
@Argument(fullName = "sample_metadata", shortName = "SM", doc = "Sample file(s) in JSON format", required = false)
public List<File> sampleFiles = new ArrayList<File>();
@Element(required = false)
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
public Integer readBufferSize = null;
@ -215,28 +208,91 @@ public class GATKArgumentCollection {
// --------------------------------------------------------------------------------------------------------------
//
// distributed GATK arguments
// PED (pedigree) support
//
// --------------------------------------------------------------------------------------------------------------
@Element(required=false)
@Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
@Hidden
public File processingTrackerFile = null;
@Element(required=false)
@Argument(fullName="restartProcessingTracker",shortName="RPT",doc="Should we delete the processing tracker file at startup?",required=false)
@Hidden
public boolean restartProcessingTracker = false;
/**
* <p>Reads PED file-formatted tabular text files describing meta-data about the samples being
* processed in the GATK.</p>
*
* <ul>
* <li>see <a href="http://www.broadinstitute.org/mpg/tagger/faq.html">http://www.broadinstitute.org/mpg/tagger/faq.html</a></li>
* <li>see <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped">http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped</a></li>
* </ul>
*
* <p>The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:</p>
*
* <ul>
* <li>Family ID</li>
* <li>Individual ID</li>
* <li>Paternal ID</li>
* <li>Maternal ID</li>
* <li>Sex (1=male; 2=female; other=unknown)</li>
* <li>Phenotype</li>
* </ul>
*
* <p>The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
* quantitative trait or an affection status column: GATK will automatically detect which type
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).</p>
*
* <p>If an individual's sex is unknown, then any character other than 1 or 2 can be used.</p>
*
* <p>You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
* line will be ignored. Do not start any family IDs with this character therefore.</p>
*
* <p>Affection status should be coded:</p>
*
* <ul>
* <li>-9 missing</li>
* <li>0 missing</li>
* <li>1 unaffected</li>
* <li>2 affected</li>
* </ul>
*
* <p>If any value outside of -9,0,1,2 is detected than the samples are assumed
* to phenotype values are interpreted as string phenotype values. In this case -9 uniquely
* represents the missing value.</p>
*
* <p>Genotypes (column 7 onwards) cannot be specified to the GATK.</p>
*
* <p>For example, here are two individuals (one row = one person):</p>
*
* <pre>
* FAM001 1 0 0 1 2
* FAM001 2 0 0 1 2
* </pre>
*
* <p>Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to
* tell the GATK PED parser that the corresponding fields are missing from the ped file.</p>
*
* <p>Note that most GATK walkers do not use pedigree information. Walkers that require pedigree
* data should clearly indicate so in their arguments and will throw errors if required pedigree
* information is missing.</p>
*/
@Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
public List<File> pedigreeFiles = Collections.emptyList();
@Element(required=false)
@Argument(fullName="processingTrackerStatusFile",shortName="CSF",doc="If provided, a detailed accounting of the state of the process tracker is written to this file. For debugging, only",required=false)
@Hidden
public File processingTrackerStatusFile = null;
/**
* Inline PED records (see -ped argument). Each -pedString STRING can contain one or more
* valid PED records (see -ped) separated by semi-colons. Supports all tags for each pedString
* as -ped supports
*/
@Argument(fullName="pedigreeString", shortName = "pedString", doc="Pedigree string for samples",required=false)
public List<String> pedigreeStrings = Collections.emptyList();
@Element(required=false)
@Argument(fullName="processingTrackerID",shortName="CID",doc="If provided, an integer ID (starting at 1) indicating a unique id for this process within the distributed GATK group",required=false)
@Hidden
public int processTrackerID = -1;
/**
* How strict should we be in parsing the PED files?
*/
@Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="How strict should we be in validating the pedigree information?",required=false)
public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
// --------------------------------------------------------------------------------------------------------------
//
// BAM indexing and sharding arguments
//
// --------------------------------------------------------------------------------------------------------------
@Element(required = false)
@Argument(fullName="allow_intervals_with_unindexed_bam",doc="Allow interval processing with an unsupported BAM. NO INTEGRATION TESTS are available. Use at your own risk.",required=false)
@ -387,7 +443,7 @@ public class GATKArgumentCollection {
return false;
}
if ((other.RODToInterval == null && RODToInterval != null) ||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
return false;
}
@ -405,20 +461,6 @@ public class GATKArgumentCollection {
(other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
return false;
if ((other.processingTrackerFile == null && this.processingTrackerFile != null) ||
(other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
return false;
if ((other.processingTrackerStatusFile == null && this.processingTrackerStatusFile != null) ||
(other.processingTrackerStatusFile != null && !other.processingTrackerStatusFile.equals(this.processingTrackerStatusFile)))
return false;
if ( restartProcessingTracker != other.restartProcessingTracker )
return false;
if ( processTrackerID != other.processTrackerID )
return false;
if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM)
return false;

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.contexts;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -76,14 +75,6 @@ public class AlignmentContextUtils {
return splitContextBySampleName(context, null);
}
public static Map<Sample, AlignmentContext> splitContextBySample(AlignmentContext context) {
Map<Sample, AlignmentContext> m = new HashMap<Sample, AlignmentContext>();
for ( Map.Entry<String, AlignmentContext> entry : splitContextBySampleName(context, null).entrySet() ) {
m.put(new Sample(entry.getKey()), entry.getValue());
}
return m;
}
/**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead
* of sample object.
@ -97,8 +88,8 @@ public class AlignmentContextUtils {
GenomeLoc loc = context.getLocation();
HashMap<String, AlignmentContext> contexts = new HashMap<String, AlignmentContext>();
for(String sample: context.getPileup().getSampleNames()) {
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSampleName(sample);
for(String sample: context.getPileup().getSamples()) {
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);
// Don't add empty pileups to the split context.
if(pileupBySample.size() == 0)

View File

@ -1,30 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Aug 12, 2010
* Time: 2:09:16 PM
*/
public class PropertyDefinition {
String property;
String[] values;
public String getProperty() {
return property;
}
public void setProperty(String property) {
this.property = property;
}
public String[] getValues() {
return values;
}
public void setValues(String[] values) {
this.values = values;
}
}

View File

@ -1,203 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
import org.broadinstitute.sting.utils.exceptions.StingException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Jul 26, 2010
* Time: 3:31:38 PM
*/
public class Sample implements java.io.Serializable {
private final String id;
private boolean hasSampleFileEntry = false; // true if this sample has an entry in a sample file
private boolean hasSAMFileEntry = false; // true if this sample has an entry in the SAM file
private HashMap<String, Object> properties = new HashMap<String, Object>();
private HashMap<String, Sample> relationships = new HashMap<String, Sample>();
public enum Gender {
MALE,
FEMALE,
UNKNOWN
}
public Sample(String id) {
/* if (id == null) {
throw new StingException("Error creating sample: sample ID cannot be null");
}*/
this.id = id;
}
public String getId() {
return this.id;
}
public Map<String, Object> getProperties() {
return properties;
}
public void setProperties(Map<String, Object> properties) {
this.properties = (HashMap) properties;
}
public Map<String,Sample> getRelationships() {
return Collections.unmodifiableMap(this.relationships);
}
public void setSampleFileEntry(boolean value) {
this.hasSampleFileEntry = value;
}
public boolean hasSAMFileEntry() {
return this.hasSAMFileEntry;
}
public void setSAMFileEntry(boolean value) {
this.hasSAMFileEntry = value;
}
public boolean hasSampleFileEntry() {
return this.hasSampleFileEntry;
}
/**
* Get one property
* @param key key of property
* @return value of property as generic object
*/
public Object getProperty(String key) {
return properties.get(key);
}
/**
* Set a property
* If property already exists, it is overwritten
* @param key key of property
* @param value object to be stored in properties array
*/
public void setProperty(String key, Object value) {
if (relationships.containsKey(key)) {
throw new StingException("The same key cannot exist as a property and a relationship");
}
if (key.equals("gender") && value.getClass() != Gender.class) {
throw new StingException("'gender' property must be of type Sample.Gender");
}
if (key.equals("population") && value.getClass() != String.class) {
throw new StingException("'population' property must be of type String");
}
properties.put(key, value);
}
/**
* Get one relationship
* @param key of relationship
* @return Sample object that this relationship points to
*/
public Sample getRelationship(String key) {
return relationships.get(key);
}
/**
* Set one relationship
* If already set, it is overwritten
* @param key key of the relationship
* @param value Sample object this relationship points to
*/
public void setRelationship(String key, Sample value) {
if (properties.containsKey(key)) {
throw new StingException("The same key cannot exist as a property and a relationship");
}
relationships.put(key, value);
}
/**
* Get the sample's mother
* @return sample object with relationship mother, if exists, or null
*/
public Sample getMother() {
return getRelationship("mother");
}
/**
* Get the sample's father
* @return sample object with relationship father, if exists, or null
*/
public Sample getFather() {
return getRelationship("father");
}
/**
* Get gender of the sample
* @return property of key "gender" - must be of type Gender
*/
public Gender getGender() {
return (Gender) properties.get("gender");
}
public String getPopulation() {
return (String) properties.get("population");
}
public String getFamilyId() {
return (String) properties.get("familyId");
}
/**
* @return True if sample is male, false if female, unknown, or null
*/
public boolean isMale() {
return properties.get("gender") == Gender.MALE;
}
/**
* @return True if sample is female, false if male, unknown or null
*/
public boolean isFemale() {
return properties.get("gender") == Gender.MALE;
}
/**
*
* @param key property key
* @return true if sample has this property (even if its value is null)
*/
public boolean hasProperty(String key) {
return properties.containsKey(key);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Sample sample = (Sample) o;
if (hasSAMFileEntry != sample.hasSAMFileEntry) return false;
if (hasSampleFileEntry != sample.hasSampleFileEntry) return false;
if (id != null ? !id.equals(sample.id) : sample.id != null) return false;
if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
if (relationships != null ? !relationships.equals(sample.relationships) : sample.relationships != null)
return false;
return true;
}
@Override
public int hashCode() {
return id != null ? id.hashCode() : "".hashCode();
}
}

View File

@ -1,31 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Aug 13, 2010
* Time: 5:13:46 PM
*/
public class SampleAlias {
String mainId;
String[] otherIds;
public String getMainId() {
return mainId;
}
public void setMainId(String mainId) {
this.mainId = mainId;
}
public String[] getOtherIds() {
return otherIds;
}
public void setOtherIds(String[] otherIds) {
this.otherIds = otherIds;
}
}

View File

@ -1,590 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.yaml.snakeyaml.TypeDescription;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.Constructor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Jul 26, 2010
* Time: 3:30:09 PM
*
* This class stores and manages sample metadata. This data is encoded in a sample file, which can be included
* in the GATK by the "--samples" argument. This class reads and parses those files.
*
* Although there are a set of public methods for accessing sample data, they aren't used by walkers - they are really
* only used by GenomeAnalysisEngine. An instance of GenomeAnalysisEngine has one SampleDataSource. When a walker
* wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
*
*/
public class SampleDataSource {
/**
* SAMFileHeader that has been created for this analysis.
*/
private SAMFileHeader header;
/**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap.
*/
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
/**
* Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
* datasets. If this is the case, one ID is the "primary ID" and others are "aliases".
*
* This maps ID => primary ID for all samples ID strings - both primary IDs and aliases.
*/
private HashMap<String, String> sampleAliases = new HashMap<String, String>();
/**
* While loading sample files, we must be aware of "special" properties and relationships that are always allowed
*/
public static final String[] specialProperties = new String[] {"familyId", "population", "gender"};
public static final String[] specialRelationships = new String[] {"mother", "father"};
/**
* Constructor takes both a SAM header and sample files because the two must be integrated.
* @param header SAMFileHeader that has been created for this analysis
* @param sampleFiles Sample files that were included on the command line
*/
public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
this();
this.header = header;
// create empty sample object for each sample referenced in the SAM header
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) {
Sample newSample = new Sample(sampleName);
newSample.setSAMFileEntry(true);
samples.put(sampleName, newSample);
}
}
// add files consecutively
if (sampleFiles != null) {
for (File file : sampleFiles) {
addFile(file);
}
}
}
public SampleDataSource() {
samples.put(null, new Sample(null));
}
/**
* Hallucinates sample objects for all the samples in the SAM file and stores them
*/
public void addSamplesFromSAMHeader(SAMFileHeader header) {
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) {
Sample newSample = new Sample(sampleName);
newSample.setSAMFileEntry(true);
samples.put(sampleName, newSample);
}
}
}
/**
* Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file
*/
public void addFile(File sampleFile) {
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(sampleFile));
}
catch (IOException e) {
throw new StingException("Could not open sample file " + sampleFile.getAbsolutePath(), e);
}
// set up YAML reader - a "Constructor" creates java object from YAML and "Loader" loads the file
Constructor con = new Constructor(SampleFileParser.class);
TypeDescription desc = new TypeDescription(SampleFileParser.class);
desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
desc.putListPropertyType("sampleAliases", SampleAlias.class);
con.addTypeDescription(desc);
Yaml yaml = new Yaml(con);
// SampleFileParser stores an object representation of a sample file - this is what we'll parse
SampleFileParser parser;
try {
parser = (SampleFileParser) yaml.load(reader);
}
catch (Exception e) {
throw new StingException("There was a syntactic error with the YAML in sample file " + sampleFile.getAbsolutePath(), e);
}
// check to see which validation options were built into the file
boolean restrictProperties = parser.getAllowedProperties() != null;
boolean restrictRelationships = parser.getAllowedRelationships() != null;
boolean restrictPropertyValues = parser.getPropertyDefinitions() != null;
// propertyValues stores the values that are allowed for a given property
HashMap<String, HashSet> propertyValues = null;
if (restrictPropertyValues) {
propertyValues = new HashMap<String, HashSet>();
for (PropertyDefinition def : parser.getPropertyDefinitions()) {
HashSet<String> set = new HashSet<String>();
for (String value : def.getValues()) {
set.add(value);
}
propertyValues.put(def.getProperty(), set);
}
}
// make sure the aliases are valid
validateAliases(parser);
// loop through each sample in the file - a SampleParser stores an object that will become a Sample
for (SampleParser sampleParser : parser.getSamples()) {
try {
// step 1: add the sample if it doesn't already exist
Sample sample = getSampleById(sampleParser.getId());
if (sample == null) {
sample = new Sample(sampleParser.getId());
}
addSample(sample);
sample.setSampleFileEntry(true);
// step 2: add the properties
if (sampleParser.getProperties() != null) {
for (String property : sampleParser.getProperties().keySet()) {
// check that property is allowed
if (restrictProperties) {
if (!isPropertyValid(property, parser.getAllowedProperties())) {
throw new StingException(property + " is an invalid property. It is not included in the list " +
"of allowed properties.");
}
}
// next check that the value is allowed
if (restrictPropertyValues) {
if (!isValueAllowed(property, sampleParser.getProperties().get(property), propertyValues)) {
throw new StingException("The value of property '" + property + "' is invalid. " +
"It is not included in the list of allowed values for this property.");
}
}
// next check that there isn't already a conflicting property there
if (sample.getProperty(property) != null &&
sample.getProperty(property) != sampleParser.getProperties().get(property))
{
throw new StingException(property + " is a conflicting property!");
}
// checks are passed - now add the property!
saveProperty(sample, property, sampleParser.getProperties().get(property));
}
}
// step 3: add the relationships
if (sampleParser.getRelationships() != null) {
for (String relationship : sampleParser.getRelationships().keySet()) {
String relativeId = sampleParser.getRelationships().get(relationship);
if (relativeId == null) {
throw new StingException("The relationship cannot be null");
}
// first check that it's not invalid
if (restrictRelationships) {
if (!isRelationshipValid(relationship, parser.getAllowedRelationships())) {
throw new StingException(relationship + " is an invalid relationship");
}
}
// next check that there isn't already a conflicting property there
if (sample.getRelationship(relationship) != null) {
if (sample.getRelationship(relationship).getId() != sampleParser.getProperties().get(relationship)) {
throw new StingException(relationship + " is a conflicting relationship!");
}
// if the relationship is already set - and consistent with what we're reading now - no need to continue
else {
continue;
}
}
// checks are passed - now save the relationship
saveRelationship(sample, relationship, relativeId);
}
}
} catch (Exception e) {
throw new StingException("An error occurred while loading this sample from the sample file: " +
sampleParser.getId(), e);
}
}
}
private boolean isValueAllowed(String key, Object value, HashMap<String, HashSet> valuesList) {
// if the property values weren't specified for this property, then any value is okay
if (!valuesList.containsKey(key)) {
return true;
}
// if this property has enumerated values, it must be a string
else if (value.getClass() != String.class)
return false;
// is the value specified or not?
else if (!valuesList.get(key).contains(value))
return false;
return true;
}
/**
* Makes sure that the aliases are valid
* Checks that 1) no string is used as both a main ID and an alias;
* 2) no alias is used more than once
* @param parser
*/
private void validateAliases(SampleFileParser parser) {
// no aliases sure validate
if (parser.getSampleAliases() == null)
return;
HashSet<String> mainIds = new HashSet<String>();
HashSet<String> otherIds = new HashSet<String>();
for (SampleAlias sampleAlias : parser.getSampleAliases()) {
mainIds.add(sampleAlias.getMainId());
for (String otherId : sampleAlias.getOtherIds()) {
if (mainIds.contains(otherId))
throw new StingException(String.format("The aliases in your sample file are invalid - the alias %s cannot " +
"be both a main ID and an other ID", otherId));
if (!otherIds.add(otherId))
throw new StingException(String.format("The aliases in your sample file are invalid - %s is listed as an " +
"alias more than once.", otherId));
}
}
}
private boolean isPropertyValid(String property, String[] allowedProperties) {
// is it a special property that is always allowed?
for (String allowedProperty : specialProperties) {
if (property.equals(allowedProperty))
return true;
}
// is it in the allowed properties list?
for (String allowedProperty : allowedProperties) {
if (property.equals(allowedProperty))
return true;
}
return false;
}
private boolean isRelationshipValid(String relationship, String[] allowedRelationships) {
// is it a special relationship that is always allowed?
for (String allowedRelationship : specialRelationships) {
if (relationship.equals(allowedRelationship))
return true;
}
// is it in the allowed properties list?
for (String allowedRelationship : allowedRelationships) {
if (relationship.equals(allowedRelationship))
return true;
}
return false;
}
/**
* Saves a property as the correct type
* @param key property key
* @param value property value, as read from YAML parser
* @return property value to be stored
*/
private void saveProperty(Sample sample, String key, Object value) {
// convert gender to the right type, if it was stored as a String
if (key.equals("gender")) {
if (((String) value).toLowerCase().equals("male")) {
value = Sample.Gender.MALE;
}
else if (((String) value).toLowerCase().equals("female")) {
value = Sample.Gender.FEMALE;
}
else if (((String) value).toLowerCase().equals("unknown")) {
value = Sample.Gender.UNKNOWN;
}
else if (value != null) {
throw new StingException("'gender' property must be male, female, or unknown.");
}
}
try {
sample.setProperty(key, value);
}
catch (Exception e) {
throw new StingException("Could not save property " + key, e);
}
}
/**
* Saves a relationship as the correct type
* @param key relationship key
* @param relativeId sample ID string of the relative
* @return relationship value to be stored
*/
private void saveRelationship(Sample sample, String key, String relativeId) {
// get the reference that we'll store as the value
Sample relative = getSampleById(relativeId);
// create sample object for the relative, if necessary
if (relative == null) {
relative = new Sample(relativeId);
addSample(relative);
}
sample.setRelationship(key, relative);
}
/**
* Filter a sample name in case it is an alias
* @param sampleId to be filtered
* @return ID of sample that stores data for this alias
*/
private String aliasFilter(String sampleId) {
if (!sampleAliases.containsKey(sampleId))
return sampleId;
else
return sampleAliases.get(sampleId);
}
/**
* Add a sample to the collection
* @param sample to be added
*/
private void addSample(Sample sample) {
samples.put(sample.getId(), sample);
}
/**
* Check if sample with this ID exists
* Note that this will return true if name passed in is an alias
* @param id ID of sample to be checked
* @return true if sample exists; false if not
*/
public boolean hasSample(String id) {
return samples.get(aliasFilter(id)) != null;
}
/**
* Get a sample by its ID
* If an alias is passed in, return the main sample object
* @param id
* @return sample Object with this ID
*/
public Sample getSampleById(String id) {
return samples.get(aliasFilter(id));
}
/**
* Get the sample for a given read group
* Must first look up ID for read group
* @param readGroup of sample
* @return sample object with ID from the read group
*/
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
String nameFromReadGroup = readGroup.getSample();
return getSampleById(nameFromReadGroup);
}
/**
* Get a sample for a given read
* Must first look up read group, and then sample ID for that read group
* @param read of sample
* @return sample object of this read
*/
public Sample getSampleByRead(SAMRecord read) {
return getSampleByReadGroup(read.getReadGroup());
}
/**
* Get number of sample objects
* @return size of samples map
*/
public int sampleCount() {
return samples.size();
}
/**
* Return all samples with a given family ID
* Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
* @param familyId
* @return
*/
public Set<Sample> getFamily(String familyId) {
HashSet<Sample> familyMembers = new HashSet<Sample>();
for (Sample sample : samples.values()) {
if (sample.getFamilyId() != null) {
if (sample.getFamilyId().equals(familyId))
familyMembers.add(sample);
}
}
return familyMembers;
}
/**
* Returns all children of a given sample
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
* @param sample
* @return
*/
public Set<Sample> getChildren(Sample sample) {
HashSet<Sample> children = new HashSet<Sample>();
for (Sample familyMember : getFamily(sample.getFamilyId())) {
if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
children.add(familyMember);
}
}
return children;
}
public Set<Sample> getSamples() {
HashSet<Sample> set = new HashSet<Sample>();
set.addAll(samples.values());
return set;
}
/**
* Takes a collection of sample names and returns their corresponding sample objects
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
* @param sampleNameList Set of sample names
* @return Corresponding set of samples
*/
public Set<Sample> getSamples(Collection<String> sampleNameList) {
HashSet<Sample> samples = new HashSet<Sample>();
for (String name : sampleNameList) {
try {
samples.add(getSampleById(name));
}
catch (Exception e) {
throw new StingException("Could not get sample with the following ID: " + name, e);
}
}
return samples;
}
/**
* Returns a set of samples that have any value (which could be null) for a given property
* @param key Property key
* @return Set of samples with the property
*/
public Set<Sample> getSamplesWithProperty(String key) {
HashSet<Sample> toReturn = new HashSet<Sample>();
for (Sample s : samples.values()) {
if (s.hasProperty(key))
toReturn.add(s);
}
return toReturn;
}
/**
* Returns a set of samples that have a property with a certain value
* Value must be a string for now - could add a similar method for matching any objects in the future
*
* @param key Property key
* @param value String property value
* @return Set of samples that match key and value
*/
public Set<Sample> getSamplesWithProperty(String key, String value) {
Set<Sample> toReturn = getSamplesWithProperty(key);
for (Sample s : toReturn) {
if (!s.getProperty(key).equals(value))
toReturn.remove(s);
}
return toReturn;
}
public Sample getOrCreateSample(String id) {
Sample sample = getSampleById(id);
if (sample == null) {
sample = new Sample(id);
addSample(sample);
}
return sample;
}
/**
* Returns all samples that were referenced in the SAM file
*/
public Set<Sample> getSAMFileSamples() {
Set<Sample> toReturn = new HashSet<Sample>();
for (Sample sample : samples.values()) {
if (sample.hasSAMFileEntry())
toReturn.add(sample);
}
return toReturn;
}
/**
* Returns a set of sample objects for the sample names in a variant context
*
* @param context Any variant context
* @return a set of the sample objects
*/
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
Set<Sample> samples = new HashSet<Sample>();
for (String sampleName : context.getSampleNames()) {
samples.add(getOrCreateSample(sampleName));
}
return samples;
}
/**
* Return a subcontext restricted to samples with a given property key/value
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
* @param context VariantContext to filter
* @param key property key
* @param value property value (must be string)
* @return subcontext
*/
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
Set<String> samplesWithProperty = new HashSet<String>();
for (String sampleName : context.getSampleNames()) {
Sample s = samples.get(sampleName);
if (s != null && s.hasProperty(key) && s.getProperty(key).equals(value))
samplesWithProperty.add(sampleName);
}
Map<String, Genotype> genotypes = context.getGenotypes(samplesWithProperty);
return context.subContextFromGenotypes(genotypes.values());
}
public static SampleDataSource createEmptyDataSource() {
SAMFileHeader header = new SAMFileHeader();
return new SampleDataSource(header, null);
}
}

View File

@ -1,65 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Aug 12, 2010
* Time: 1:30:44 PM
*/
public class SampleFileParser {
private SampleAlias[] sampleAliases;
private String[] allowedProperties;
private String[] allowedRelationships;
private PropertyDefinition[] propertyDefinitions;
private SampleParser[] samples;
public PropertyDefinition[] getPropertyDefinitions() {
return propertyDefinitions;
}
public void setPropertyDefinitions(PropertyDefinition[] propertyDefinitions) {
this.propertyDefinitions = propertyDefinitions;
}
public SampleFileParser() {
}
public String[] getAllowedProperties() {
return allowedProperties;
}
public void setAllowedProperties(String[] allowedProperties) {
this.allowedProperties = allowedProperties;
}
public SampleParser[] getSamples() {
return samples;
}
public void setSamples(SampleParser[] samples) {
this.samples = samples;
}
public String[] getAllowedRelationships() {
return allowedRelationships;
}
public void setAllowedRelationships(String[] allowedRelationships) {
this.allowedRelationships = allowedRelationships;
}
public SampleAlias[] getSampleAliases() {
return sampleAliases;
}
public void setSampleAliases(SampleAlias[] sampleAliases) {
this.sampleAliases = sampleAliases;
}
}

View File

@ -1,43 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
import java.util.HashMap;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Aug 13, 2010
* Time: 2:09:43 PM
*/
public class SampleParser {
private String id;
private HashMap<String, Object> properties;
private HashMap<String, String> relationships;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public HashMap<String, Object> getProperties() {
return properties;
}
public void setProperties(HashMap<String, Object> properties) {
this.properties = properties;
}
public HashMap<String, String> getRelationships() {
return relationships;
}
public void setRelationships(HashMap<String, String> relationships) {
this.relationships = relationships;
}
}

View File

@ -84,12 +84,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
*/
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
super(engine, walker, reads, reference, rods);
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
if (engine.getArguments().processingTrackerFile != null) {
throw new UserException.BadArgumentValue("-C", "Distributed GATK calculations currently not supported in multi-threaded mode. Complain to Mark depristo@broadinstitute.org to implement and test this code path");
}
}
public Object execute( Walker walker, ShardStrategy shardStrategy ) {

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.SampleUtils;
import java.util.Collection;
@ -56,7 +57,8 @@ public class LinearMicroScheduler extends MicroScheduler {
traversalEngine.startTimersIfNecessary();
if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());

View File

@ -62,7 +62,10 @@ public class ShardTraverser implements Callable {
Object accumulator = walker.reduceInit();
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(), microScheduler.engine.getSampleMetadata()); // todo: microScheduler.engine is protected - is it okay to user it here?
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
microScheduler.getReadIterator(shard),
shard.getGenomeLocs(),
microScheduler.engine.getSampleDB().getSampleNames()); // todo: microScheduler.engine is protected - is it okay to user it here?
ShardDataProvider dataProvider = null;
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {

View File

@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
@ -12,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
@ -63,17 +63,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
* the given intervals.
* @param iterator The data source for this window.
* @param intervals The set of intervals over which to traverse.
* @param sampleData SampleDataSource that we can reference reads with
* @param sampleNames The complete set of sample names in the reads in shard
*/
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, SampleDataSource sampleData ) {
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator;
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
}
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
}
public Iterator<WindowMakerIterator> iterator() {
return this;
}

View File

@ -35,12 +35,11 @@ import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReservoirDownsampler;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -52,9 +51,6 @@ import java.util.*;
/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
public class LocusIteratorByState extends LocusIterator {
// private static long discarded_bases = 0L;
// private static long observed_bases = 0L;
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
@ -69,7 +65,7 @@ public class LocusIteratorByState extends LocusIterator {
* Used to create new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
private final ArrayList<Sample> samples;
private final ArrayList<String> samples;
private final ReadStateManager readStates;
static private class SAMRecordState {
@ -278,15 +274,30 @@ public class LocusIteratorByState extends LocusIterator {
//
// -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData ) {
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples ) {
this.readInfo = readInformation;
this.genomeLocParser = genomeLocParser;
this.samples = new ArrayList<String>(samples);
this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
// get the list of samples
this.samples = new ArrayList<Sample>(sampleData.getSamples());
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
if ( this.samples.isEmpty() && samIterator.hasNext() ) {
// actually we cannot process BAMs without read groups unless we tolerate empty
// sample lists. In the empty case we need to add the null element to the samples
this.samples.add(null);
//throw new IllegalArgumentException("samples list must not be empty");
}
}
/**
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
* for the system.
*/
public final static Collection<String> sampleListForSAMWithoutReadGroups() {
List<String> samples = new ArrayList<String>();
samples.add(null);
return samples;
}
public Iterator<AlignmentContext> iterator() {
@ -303,19 +314,6 @@ public class LocusIteratorByState extends LocusIterator {
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
}
public void printState() {
for(Sample sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
while(iterator.hasNext()) {
SAMRecordState state = iterator.next();
logger.debug(String.format("printState():"));
SAMRecord read = state.getRead();
int offset = state.getReadOffset();
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
}
}
}
private GenomeLoc getLocation() {
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
}
@ -355,14 +353,14 @@ public class LocusIteratorByState extends LocusIterator {
// In this case, the subsequent call to next() will emit the normal pileup at the current base
// and shift the position.
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
Map<Sample,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<Sample,ReadBackedExtendedEventPileupImpl>();
Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileupImpl>();
// get current location on the reference and decrement it by 1: the indels we just stepped over
// are associated with the *previous* reference base
GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1);
boolean hasBeenSampled = false;
for(Sample sample: samples) {
for(final String sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
@ -426,10 +424,10 @@ public class LocusIteratorByState extends LocusIterator {
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
} else {
GenomeLoc location = getLocation();
Map<Sample,ReadBackedPileupImpl> fullPileup = new HashMap<Sample,ReadBackedPileupImpl>();
Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();
boolean hasBeenSampled = false;
for(Sample sample: samples) {
for(final String sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
@ -495,7 +493,7 @@ public class LocusIteratorByState extends LocusIterator {
}
private void updateReadStates() {
for(Sample sample: samples) {
for(final String sample: samples) {
Iterator<SAMRecordState> it = readStates.iterator(sample);
while ( it.hasNext() ) {
SAMRecordState state = it.next();
@ -522,7 +520,7 @@ public class LocusIteratorByState extends LocusIterator {
private final PeekableIterator<SAMRecord> iterator;
private final DownsamplingMethod downsamplingMethod;
private final SamplePartitioner samplePartitioner;
private final Map<Sample,PerSampleReadStateManager> readStatesBySample = new HashMap<Sample,PerSampleReadStateManager>();
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
private final int targetCoverage;
private int totalReadStates = 0;
@ -540,9 +538,9 @@ public class LocusIteratorByState extends LocusIterator {
}
Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
for(Sample sample: samples) {
for(final String sample: samples) {
readStatesBySample.put(sample,new PerSampleReadStateManager());
readSelectors.put(sample.getId(),downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
}
samplePartitioner = new SamplePartitioner(readSelectors);
@ -554,7 +552,7 @@ public class LocusIteratorByState extends LocusIterator {
* @param sample The sample.
* @return Iterator over the reads associated with that sample.
*/
public Iterator<SAMRecordState> iterator(final Sample sample) {
public Iterator<SAMRecordState> iterator(final String sample) {
return new Iterator<SAMRecordState>() {
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
@ -590,7 +588,7 @@ public class LocusIteratorByState extends LocusIterator {
* @param sample The sample.
* @return Total number of reads in the given sample.
*/
public int size(final Sample sample) {
public int size(final String sample) {
return readStatesBySample.get(sample).size();
}
@ -600,12 +598,12 @@ public class LocusIteratorByState extends LocusIterator {
* @param sample Sample, downsampled independently.
* @return Integer stop of the furthest undownsampled region.
*/
public int getDownsamplingExtent(final Sample sample) {
public int getDownsamplingExtent(final String sample) {
return readStatesBySample.get(sample).getDownsamplingExtent();
}
public SAMRecordState getFirst() {
for(Sample sample: samples) {
for(final String sample: samples) {
PerSampleReadStateManager reads = readStatesBySample.get(sample);
if(!reads.isEmpty())
return reads.peek();
@ -639,8 +637,8 @@ public class LocusIteratorByState extends LocusIterator {
}
samplePartitioner.complete();
for(Sample sample: samples) {
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample.getId());
for(final String sample: samples) {
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
@ -1072,6 +1070,3 @@ class SamplePartitioner implements ReadSelector {
}
}

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
* Categorical sample trait for association and analysis
*
* Samples can have unknown status, be affected or unaffected by the
* categorical trait, or they can be marked as actually having an
* other trait value (stored in an associated value in the Sample class)
*
* @author Mark DePristo
* @since Sept. 2011
*/
public enum Affection {
/** Status is unknown */
UNKNOWN,
/** Suffers from the disease */
AFFECTED,
/** Unaffected by the disease */
UNAFFECTED,
/** An "other" trait: value of the trait is stored elsewhere and is an arbitrary string */
OTHER
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
* ENUM of possible human genders: male, female, or unknown
*/
public enum Gender {
MALE,
FEMALE,
UNKNOWN
}

View File

@ -0,0 +1,310 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.*;
import java.util.*;
/**
* Reads PED file-formatted tabular text files
*
* See http://www.broadinstitute.org/mpg/tagger/faq.html
* See http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
*
* The "ped" file format refers to the widely-used format for linkage pedigree data.
* Each line describes a single (diploid) individual in the following format:
*
* family_ID individual_ID father_ID mother_ID gender phenotype genotype_1 genotype_2 ...
*
* If your data lacks pedigree information (for example, unrelated case/control individuals),
* set the father_ID and mother_ID to 0. sex denotes the individual's gender with 1=male and 2=female.
* phenotype refers to the affected status (for association studies) where 0=unknown, 1=unaffected, 2=affected.
* Finally, each genotype is written as two (=diploid) integer numbers (separated by whitespace),
* where 1=A, 2=C, 3=G, 4=T. No header lines are allowed and all columns must be separated by whitespace.
* Check out the information at the PLINK website on the "ped" file format.
*
* The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:
* Family ID
* Individual ID
* Paternal ID
* Maternal ID
* Sex (1=male; 2=female; other=unknown)
* Phenotype
*
* The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
* quantitative trait or an affection status column: PLINK will automatically detect which type
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
* Note that the GATK actually supports arbitrary values for quantitative trait -- not just doubles --
* and are actually representing these values as strings instead of doubles
*
* NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and
* not a comma, i.e. 2.394 not 2,394
*
* If an individual's sex is unknown, then any character other than 1 or 2 can be used.
* When new files are created (PED, FAM, or other which contain sex) then the original coding will be
* preserved. However, these individuals will be dropped from any analyses (i.e. phenotype set to missing also)
* and an error message will arise if an analysis that uses family information is requested and an
* individual of 'unknown' sex is specified as a father or mother.
*
*
* HINT You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
* line will be ignored. Do not start any family IDs with this character therefore.
*
* Affection status, by default, should be coded:
* -9 missing
* 0 missing
* 1 unaffected
* 2 affected
*
* If your file is coded 0/1 to represent unaffected/affected, then use the --1 flag:
* plink --file mydata --1 which will specify a disease phenotype coded:
*
* -9 missing
* 0 unaffected
* 1 affected
*
* The missing phenotype value for quantitative traits is, by default, -9 (this can also be used for
* disease traits as well as 0). It can be reset by including the --missing-phenotype option:
*
* Genotypes (column 7 onwards) should also be white-space delimited; they can be any character
* (e.g. 1,2,3,4 or A,C,G,T or anything else) except 0 which is, by default, the missing genotype
* character. All markers should be biallelic. All SNPs (whether haploid or not) must have two
* alleles specified. Either Both alleles should be missing (i.e. 0) or neither.
*
* No header row should be given. For example, here are two individuals typed for 3 SNPs (one row = one person):
*
* FAM001 1 0 0 1 2 A A G G A C
* FAM001 2 0 0 1 2 A A A G 0 0
* ...
*
* Note that the GATK does not support genotypes in a PED file.
*
* @author Mark DePristo
* @since 2011
*/
public class PedReader {
private static Logger logger = Logger.getLogger(PedReader.class);
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
final static private String commentMarker = "#";
/**
* An enum that specifies which, if any, of the standard PED fields are
* missing from the input records. For example, suppose we have the full record:
*
* "fam1 kid dad mom 1 2"
*
* indicating a male affected child. This can be parsed with the -ped x.ped argument
* to the GATK. Suppose we only have:
*
* "fam1 kid 1"
*
* we can parse the reduced version of this record with -ped:NO_PARENTS,NO_PHENOTYPE x.ped
*/
public enum MissingPedField {
/**
* The PED records do not have the first (FAMILY_ID) argument. The family id
* will be set to null / empty.
*/
NO_FAMILY_ID,
/**
* The PED records do not have either the paternal or maternal IDs, so
* the corresponding IDs are set to null.
*/
NO_PARENTS,
/**
* The PED records do not have the GENDER field, so the sex of each
* sample will be set to UNKNOWN.
*/
NO_SEX,
/**
* The PED records do not have the PHENOTYPE field, so the phenotype
* of each sample will be set to UNKNOWN.
*/
NO_PHENOTYPE
}
protected enum Field {
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
}
// phenotype
private final static String MISSING_VALUE1 = "-9";
private final static String MISSING_VALUE2 = "0";
private final static String PHENOTYPE_UNAFFECTED = "1";
private final static String PHENOTYPE_AFFECTED = "2";
// Sex
private final static String SEX_MALE = "1";
private final static String SEX_FEMALE = "2";
// other=unknown
public PedReader() { }
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB);
}
public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
}
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets?
final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
final int samplePos = familyPos + 1;
final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
// go through once and determine properties
int lineNo = 1;
boolean isQT = false;
final List<String[]> splits = new ArrayList<String[]>(lines.size());
for ( final String line : lines ) {
if ( line.startsWith(commentMarker)) continue;
if ( line.trim().equals("") ) continue;
final String[] parts = line.split("\\s+");
if ( parts.length != nExpectedFields )
throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
if ( phenotypePos != -1 ) {
isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
}
splits.add(parts);
lineNo++;
}
logger.info("Phenotype is other? " + isQT);
// now go through and parse each record
lineNo = 1;
final List<Sample> samples = new ArrayList<Sample>(splits.size());
for ( final String[] parts : splits ) {
String familyID = null, individualID, paternalID = null, maternalID = null;
Gender sex = Gender.UNKNOWN;
String quantitativePhenotype = Sample.UNSET_QT;
Affection affection = Affection.UNKNOWN;
if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
individualID = parts[samplePos];
if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
if ( sexPos != -1 ) {
if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
else sex = Gender.UNKNOWN;
}
if ( phenotypePos != -1 ) {
if ( isQT ) {
if ( parts[phenotypePos].equals(MISSING_VALUE1) )
affection = Affection.UNKNOWN;
else {
affection = Affection.OTHER;
quantitativePhenotype = parts[phenotypePos];
}
} else {
if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
}
}
final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
samples.add(s);
sampleDB.addSample(s);
lineNo++;
}
for ( final Sample sample : new ArrayList<Sample>(samples) ) {
Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
if ( dad != null ) samples.add(dad);
Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
if ( mom != null ) samples.add(mom);
}
return samples;
}
private final static String maybeMissing(final String string) {
if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
return null;
else
return string;
}
private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
if ( id != null && sampleDB.getSample(id) == null ) {
Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
sampleDB.addSample(s);
return s;
} else
return null;
}
/**
* Parses a list of tags from the command line, assuming it comes from the GATK Engine
* tags, and returns the corresponding EnumSet.
*
* @param arg the actual engine arg, used for the UserException if there's an error
* @param tags a list of string tags that should be converted to the MissingPedField value
* @return
*/
public static final EnumSet<MissingPedField> parseMissingFieldTags(final Object arg, final List<String> tags) {
final EnumSet<MissingPedField> missingFields = EnumSet.noneOf(MissingPedField.class);
for ( final String tag : tags ) {
try {
missingFields.add(MissingPedField.valueOf(tag));
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue(arg.toString(), "Unknown tag " + tag + " allowed values are " + MissingPedField.values());
}
}
return missingFields;
}
}

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
*
*/
public enum PedigreeValidationType {
/**
* Require if a pedigree file is provided at all samples in the VCF or BAM files have a corresponding
* entry in the pedigree file(s).
*/
STRICT,
/**
* Do not enforce any overlap between the VCF/BAM samples and the pedigree data
* */
SILENT
}

View File

@ -0,0 +1,222 @@
package org.broadinstitute.sting.gatk.samples;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.HashMap;
import java.util.Map;
/**
*
*/
public class Sample implements Comparable<Sample> { // implements java.io.Serializable {
final private String familyID, paternalID, maternalID;
final private Gender gender;
final private String otherPhenotype;
final private Affection affection;
final private String ID;
final private SampleDB infoDB;
final private Map<String, Object> properties = new HashMap<String, Object>();
public final static String UNSET_QT = null;
public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final String otherPhenotype) {
this.familyID = familyID;
this.paternalID = paternalID;
this.maternalID = maternalID;
this.gender = gender;
this.otherPhenotype = otherPhenotype;
this.affection = affection;
this.ID = ID;
this.infoDB = infoDB;
}
protected Sample(final String ID,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final String otherPhenotype) {
this(ID, null, familyID, paternalID, maternalID, gender, affection, otherPhenotype);
}
protected Sample(final String ID,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection) {
this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
}
public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
}
public Sample(final String ID, final SampleDB infoDB, final Affection affection, final String otherPhenotype) {
this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, otherPhenotype);
}
public Sample(String id, SampleDB infoDB) {
this(id, infoDB, null, null, null,
Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
}
// -------------------------------------------------------------------------------------
//
// standard property getters
//
// -------------------------------------------------------------------------------------
public String getID() {
return ID;
}
public String getFamilyID() {
return familyID;
}
public String getPaternalID() {
return paternalID;
}
public String getMaternalID() {
return maternalID;
}
public Affection getAffection() {
return affection;
}
public boolean hasOtherPhenotype() {
return affection == Affection.OTHER;
}
public String getOtherPhenotype() {
return otherPhenotype;
}
/**
* Get the sample's mother
* @return sample object with relationship mother, if exists, or null
*/
public Sample getMother() {
return infoDB.getSample(maternalID);
}
/**
* Get the sample's father
* @return sample object with relationship father, if exists, or null
*/
public Sample getFather() {
return infoDB.getSample(paternalID);
}
/**
* Get gender of the sample
* @return property of key "gender" - must be of type Gender
*/
public Gender getGender() {
return gender;
}
@Override
public int compareTo(final Sample sample) {
return ID.compareTo(sample.getID());
}
@Override
public String toString() {
return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
getOtherPhenotype(), properties);
}
// // -------------------------------------------------------------------------------------
// //
// // code for working with additional -- none standard -- properites
// //
// // -------------------------------------------------------------------------------------
//
// public Map<String, Object> getExtraProperties() {
// return Collections.unmodifiableMap(properties);
// }
//
// /**
// * Get one property
// * @param key key of property
// * @return value of property as generic object
// */
// public Object getExtraPropertyValue(final String key) {
// return properties.get(key);
// }
//
// /**
// *
// * @param key property key
// * @return true if sample has this property (even if its value is null)
// */
// public boolean hasExtraProperty(String key) {
// return properties.containsKey(key);
// }
@Override
public int hashCode() {
return ID.hashCode();
}
@Override
public boolean equals(final Object o) {
if(o == null)
return false;
if(o instanceof Sample) {
Sample otherSample = (Sample)o;
return ID.equals(otherSample.ID) &&
equalOrNull(familyID, otherSample.familyID) &&
equalOrNull(paternalID, otherSample.paternalID) &&
equalOrNull(maternalID, otherSample.maternalID) &&
equalOrNull(gender, otherSample.gender) &&
equalOrNull(otherPhenotype, otherSample.otherPhenotype) &&
equalOrNull(affection, otherSample.affection) &&
equalOrNull(properties, otherSample.properties);
}
return false;
}
private final static boolean equalOrNull(final Object o1, final Object o2) {
if ( o1 == null )
return o2 == null;
else
return o2 == null ? false : o1.equals(o2);
}
private final static <T> T mergeValues(final String name, final String field, final T o1, final T o2, final T emptyValue) {
if ( o1 == null || o1.equals(emptyValue) ) {
// take o2 if both are null, otherwise keep o2
return o2 == null ? null : o2;
} else {
if ( o2 == null || o2.equals(emptyValue) )
return o1; // keep o1, since it's a real value
else {
// both o1 and o2 have a value
if ( o1 == o2 )
return o1;
else
throw new UserException("Inconsistent values detected for " + name + " for field " + field + " value1 " + o1 + " value2 " + o2);
}
}
}
public final static Sample mergeSamples(final Sample prev, final Sample next) {
if ( prev.equals(next) )
return next;
else {
return new Sample(prev.getID(), prev.infoDB,
mergeValues(prev.getID(), "Family_ID", prev.getFamilyID(), next.getFamilyID(), null),
mergeValues(prev.getID(), "Paternal_ID", prev.getPaternalID(), next.getPaternalID(), null),
mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
mergeValues(prev.getID(), "OtherPhenotype", prev.getOtherPhenotype(), next.getOtherPhenotype(), UNSET_QT));
//mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
}
}
}

View File

@ -0,0 +1,183 @@
package org.broadinstitute.sting.gatk.samples;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.*;
/**
*
*/
public class SampleDB {
/**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap.
*/
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
/**
* Constructor takes both a SAM header and sample files because the two must be integrated.
*/
public SampleDB() {
}
/**
* Protected function to add a single sample to the database
*
* @param sample to be added
*/
protected SampleDB addSample(Sample sample) {
Sample prev = samples.get(sample.getID());
if ( prev != null )
sample = Sample.mergeSamples(prev, sample);
samples.put(sample.getID(), sample);
return this;
}
// --------------------------------------------------------------------------------
//
// Functions for getting a sample from the DB
//
// --------------------------------------------------------------------------------
/**
* Get a sample by its ID
* If an alias is passed in, return the main sample object
* @param id
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(String id) {
return samples.get(id);
}
/**
*
* @param read
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(final SAMRecord read) {
return getSample(read.getReadGroup());
}
/**
*
* @param rg
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(final SAMReadGroupRecord rg) {
return getSample(rg.getSample());
}
/**
* @param g Genotype
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(final Genotype g) {
return getSample(g.getSampleName());
}
// --------------------------------------------------------------------------------
//
// Functions for accessing samples in the DB
//
// --------------------------------------------------------------------------------
/**
* Get number of sample objects
* @return size of samples map
*/
public int sampleCount() {
return samples.size();
}
public Set<Sample> getSamples() {
return new HashSet<Sample>(samples.values());
}
public Collection<String> getSampleNames() {
return Collections.unmodifiableCollection(samples.keySet());
}
/**
* Takes a collection of sample names and returns their corresponding sample objects
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
* @param sampleNameList Set of sample names
* @return Corresponding set of samples
*/
public Set<Sample> getSamples(Collection<String> sampleNameList) {
HashSet<Sample> samples = new HashSet<Sample>();
for (String name : sampleNameList) {
try {
samples.add(getSample(name));
}
catch (Exception e) {
throw new StingException("Could not get sample with the following ID: " + name, e);
}
}
return samples;
}
// --------------------------------------------------------------------------------
//
// Higher level pedigree functions
//
// --------------------------------------------------------------------------------
/**
* Returns a sorted set of the family IDs in all samples (excluding null ids)
* @return
*/
public final Set<String> getFamilyIDs() {
return getFamilies().keySet();
}
/**
* Returns a map from family ID -> set of family members for all samples with
* non-null family ids
*
* @return
*/
public final Map<String, Set<Sample>> getFamilies() {
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
for ( final Sample sample : samples.values() ) {
final String famID = sample.getFamilyID();
if ( famID != null ) {
if ( ! families.containsKey(famID) )
families.put(famID, new TreeSet<Sample>());
families.get(famID).add(sample);
}
}
return families;
}
/**
* Return all samples with a given family ID
* @param familyId
* @return
*/
public Set<Sample> getFamily(String familyId) {
return getFamilies().get(familyId);
}
/**
* Returns all children of a given sample
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
* @param sample
* @return
*/
public Set<Sample> getChildren(Sample sample) {
final HashSet<Sample> children = new HashSet<Sample>();
for ( final Sample familyMember : getFamily(sample.getFamilyID())) {
if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) {
children.add(familyMember);
}
}
return children;
}
}

View File

@ -0,0 +1,153 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
/**
*
*/
public class SampleDBBuilder {
PedigreeValidationType validationStrictness;
final SampleDB sampleDB = new SampleDB();
final GenomeAnalysisEngine engine;
Set<Sample> samplesFromDataSources = new HashSet<Sample>();
Set<Sample> samplesFromPedigrees = new HashSet<Sample>();
/** for testing only */
protected SampleDBBuilder(PedigreeValidationType validationStrictness) {
engine = null;
this.validationStrictness = validationStrictness;
}
/**
* Constructor takes both a SAM header and sample files because the two must be integrated.
*/
public SampleDBBuilder(GenomeAnalysisEngine engine, PedigreeValidationType validationStrictness) {
this.engine = engine;
this.validationStrictness = validationStrictness;
}
/**
* Hallucinates sample objects for all the samples in the SAM file and stores them
*/
public SampleDBBuilder addSamplesFromSAMHeader(final SAMFileHeader header) {
addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
return this;
}
public SampleDBBuilder addSamplesFromSampleNames(final Collection<String> sampleNames) {
for (final String sampleName : sampleNames) {
if (sampleDB.getSample(sampleName) == null) {
final Sample newSample = new Sample(sampleName, sampleDB);
sampleDB.addSample(newSample);
samplesFromDataSources.add(newSample); // keep track of data source samples
}
}
return this;
}
public SampleDBBuilder addSamplesFromPedigreeFiles(final List<File> pedigreeFiles) {
for (final File pedFile : pedigreeFiles) {
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedFile);
samplesFromPedigrees.addAll(samples);
}
return this;
}
public SampleDBBuilder addSamplesFromPedigreeStrings(final List<String> pedigreeStrings) {
for (final String pedString : pedigreeStrings) {
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedString);
samplesFromPedigrees.addAll(samples);
}
return this;
}
/**
* Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file
*/
private Collection<Sample> addSamplesFromPedigreeArgument(File sampleFile) {
final PedReader reader = new PedReader();
try {
return reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(sampleFile, e);
}
}
private Collection<Sample> addSamplesFromPedigreeArgument(final String string) {
final PedReader reader = new PedReader();
return reader.parse(string, getMissingFields(string), sampleDB);
}
public SampleDB getFinalSampleDB() {
validate();
return sampleDB;
}
public EnumSet<PedReader.MissingPedField> getMissingFields(final Object engineArg) {
if ( engine == null )
return EnumSet.noneOf(PedReader.MissingPedField.class);
else {
final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
return PedReader.parseMissingFieldTags(engineArg, posTags);
}
}
// --------------------------------------------------------------------------------
//
// Validation
//
// --------------------------------------------------------------------------------
protected final void validate() {
if ( validationStrictness == PedigreeValidationType.SILENT )
return;
else {
// check that samples in data sources are all annotated, if anything is annotated
if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
for ( final Sample pSample : samplesFromPedigrees )
sampleNamesFromPedigrees.add(pSample.getID());
for ( final Sample dsSample : samplesFromDataSources )
if ( ! sampleNamesFromPedigrees.contains(dsSample.getID()) )
throw new UserException("Sample " + dsSample.getID() + " found in data sources but not in pedigree files with STRICT pedigree validation");
}
}
}
}

View File

@ -30,11 +30,12 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
import java.util.List;
@ -87,6 +88,14 @@ public abstract class Walker<MapType, ReduceType> {
return getToolkit().getMasterSequenceDictionary();
}
protected SampleDB getSampleDB() {
return getToolkit().getSampleDB();
}
protected Sample getSample(final String id) {
return getToolkit().getSampleDB().getSample(id);
}
/**
* (conceptual static) method that states whether you want to see reads piling up at a locus
* that contain a deletion at the locus.

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Gender;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -247,7 +248,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes();
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
for ( String sample : samples ) {
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getToolkit().getSampleById(sample).isMale();
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;
Genotype genotype;
boolean isValidation;

View File

@ -227,9 +227,8 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
@Override
public void initialize() {
if ( getToolkit().getSamples().size() != 2 ) {
// unbelievably there are actually two samples even when there's just one in the header. God I hate this Samples system
throw new UserException.BadArgumentValue("-I", "CallableLoci only works for a single sample, but multiple samples were found in the provided BAM files: " + getToolkit().getSamples());
if ( getSampleDB().getSamples().size() != 1 ) {
throw new UserException.BadArgumentValue("-I", "CallableLoci only works for a single sample, but multiple samples were found in the provided BAM files: " + getSampleDB().getSamples());
}
try {

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
@ -281,20 +282,14 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
private HashSet<String> getSamplesFromToolKit(DoCOutputType.Partition type) {
HashSet<String> partition = new HashSet<String>();
if ( type == DoCOutputType.Partition.sample ) {
for ( Set<String> sampleSet : getToolkit().getSamplesByReaders() ) {
for ( String s : sampleSet ) {
partition.add(s);
}
}
partition.addAll(SampleUtils.getSAMFileSamples(getToolkit()));
} else if ( type == DoCOutputType.Partition.readgroup ) {
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
partition.add(rg.getSample()+"_rg_"+rg.getReadGroupId());
}
} else if ( type == DoCOutputType.Partition.library ) {
for ( Set<String> libraries : getToolkit().getLibrariesByReaders() ) {
for ( String l : libraries ) {
partition.add(l);
}
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
partition.add(rg.getLibrary());
}
} else if ( type == DoCOutputType.Partition.center ) {
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {

View File

@ -248,6 +248,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
@Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file")
protected String N_WAY_OUT = null;
@Hidden
@Argument(fullName="generate_nWayOut_md5s",doc="Generate md5sums for BAMs")
protected boolean generateMD5s = false;
// DEBUGGING OPTIONS FOLLOW
@ -401,9 +404,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// if ( args.containsKey("disable_bam_indexing") ) { System.out.println("NO INDEXING!!"); System.exit(1); createIndex = false; }
if ( N_WAY_OUT.toUpperCase().endsWith(".MAP") ) {
writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex);
writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s);
} else {
writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex);
writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s);
}
} else {

View File

@ -32,16 +32,11 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;

View File

@ -36,12 +36,8 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -51,6 +47,9 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.collections.CircularArray;
import org.broadinstitute.sting.utils.collections.PrimitivePair;
@ -392,7 +391,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
normalSamples = getToolkit().getSamplesByReaders().get(0);
normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0));
try {
// we already checked that bedOutput and output_file are not set simultaneously

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
@ -1095,14 +1094,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
// filter the read-base pileup based on min base and mapping qualities:
pileup = pileup.getBaseAndMappingFilteredPileup(MIN_BASE_QUALITY_SCORE, MIN_MAPPING_QUALITY_SCORE);
if (pileup != null) {
for (Sample sample : pileup.getSamples()) {
for (final String sample : pileup.getSamples()) {
ReadBackedPileup samplePileup = pileup.getPileupForSample(sample);
ReadBasesAtPosition readBases = new ReadBasesAtPosition();
for (PileupElement p : samplePileup) {
if (!p.isDeletion()) // IGNORE deletions for now
readBases.putReadBase(p);
}
sampleReadBases.put(sample.getId(), readBases);
sampleReadBases.put(sample, readBases);
}
}
}

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Gender;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
*/
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountMalesWalker extends ReadWalker<Integer, Integer> {
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
Sample sample = getSampleDB().getSample(read);
return sample.getGender() == Gender.MALE ? 1 : 0;
}
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
}

View File

@ -130,6 +130,10 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
nVariantLoci++;
nMixed++;
break;
case SYMBOLIC:
// ignore symbolic alleles, but don't fail
// todo - consistent way of treating symbolic alleles thgoughout codebase?
break;
default:
throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
}

View File

@ -133,7 +133,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
/**
* By default, this tool throws a UserException when it encounters a field without a value in some record. This
* is generally useful when you mistype -F CHRMO, so that you get a friendly warning about CHRMO not being
* is generally useful when you mistype -F CHROM, so that you get a friendly warning about CHRMO not being
* found before the tool runs through 40M 1000G records. However, in some cases you genuinely want to allow such
* fields (e.g., AC not being calculated for filtered records, if included). When provided, this argument
* will cause VariantsToTable to write out NA values for missing fields instead of throwing an error.

View File

@ -1,13 +1,10 @@
package org.broadinstitute.sting.utils;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
@ -19,9 +16,6 @@ import java.util.regex.Pattern;
* Time: 12:38 PM
*/
public class MendelianViolation {
String sampleMom;
String sampleDad;
String sampleChild;
@ -34,22 +28,15 @@ public class MendelianViolation {
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
public String getSampleMom() {
return sampleMom;
}
public String getSampleDad() {
return sampleDad;
}
public String getSampleChild() {
return sampleChild;
}
public double getMinGenotypeQuality() {
return minGenotypeQuality;
}
@ -90,37 +77,12 @@ public class MendelianViolation {
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
*/
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
sampleMom = sample.getMother().getId();
sampleDad = sample.getFather().getId();
sampleChild = sample.getId();
sampleMom = sample.getMother().getID();
sampleDad = sample.getFather().getID();
sampleChild = sample.getID();
minGenotypeQuality = minGenotypeQualityP;
}
/**
* The most common constructor to be used when give a YAML file with the relationships to the engine with the -SM option.
* @param engine - The GATK engine, use getToolkit(). That's where the sample information is stored.
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
*/
public MendelianViolation(GenomeAnalysisEngine engine, double minGenotypeQualityP) {
boolean gotSampleInformation = false;
Collection<Sample> samples = engine.getSamples();
// Iterate through all samples in the sample_metadata file but we really can only take one.
for (Sample sample : samples) {
if (sample.getMother() != null && sample.getFather() != null) {
sampleMom = sample.getMother().getId();
sampleDad = sample.getFather().getId();
sampleChild = sample.getId();
minGenotypeQuality = minGenotypeQualityP;
gotSampleInformation = true;
break; // we can only deal with one trio information
}
}
if (!gotSampleInformation)
throw new UserException("YAML file has no sample with relationship information (mother/father)");
}
/**
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
@ -158,7 +120,7 @@ public class MendelianViolation {
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
*
*/
public boolean isViolation (VariantContext vc)
public boolean isViolation(VariantContext vc)
{
return setAlleles(vc) && isViolation();
}
@ -172,42 +134,4 @@ public class MendelianViolation {
return false;
return true;
}
/**
* @return the likelihood ratio for a mendelian violation
*/
public double violationLikelihoodRatio(VariantContext vc) {
double[] logLikAssignments = new double[27];
// the matrix to set up is
// MOM DAD CHILD
// |- AA
// AA AA | AB
// |- BB
// |- AA
// AA AB | AB
// |- BB
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
int offset = 0;
for ( int oMom = 0; oMom < 3; oMom++ ) {
for ( int oDad = 0; oDad < 3; oDad++ ) {
for ( int oChild = 0; oChild < 3; oChild ++ ) {
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
}
}
}
double[] mvLiks = new double[12];
double[] nonMVLiks = new double[15];
for ( int i = 0; i < 12; i ++ ) {
mvLiks[i] = logLikAssignments[mvOffsets[i]];
}
for ( int i = 0; i < 15; i++) {
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
}
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
}
}

View File

@ -69,6 +69,18 @@ public class SampleUtils {
return samples;
}
/**
* Same as @link getSAMFileSamples but gets all of the samples
* in the SAM files loaded by the engine
*
* @param engine
* @return
*/
public final static Set<String> getSAMFileSamples(GenomeAnalysisEngine engine) {
return SampleUtils.getSAMFileSamples(engine.getSAMFileHeader());
}
/**
* Gets all of the unique sample names from all VCF rods input by the user
*

View File

@ -4,7 +4,6 @@ import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -215,6 +215,10 @@ public class UserException extends ReviewedStingException {
super(String.format("File %s is malformed: %s caused by %s", f.getAbsolutePath(), message, e.getMessage()));
}
public MalformedFile(String name, String message) {
super(String.format("File associated with name %s is malformed: %s", name, message));
}
public MalformedFile(String name, String message, Exception e) {
super(String.format("File associated with name %s is malformed: %s caused by %s", name, message, e.getMessage()));
}

View File

@ -26,11 +26,9 @@ package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import java.util.*;
@ -114,10 +112,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
calculateCachedData();
}
protected AbstractReadBackedPileup(GenomeLoc loc, Map<Sample,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
protected AbstractReadBackedPileup(GenomeLoc loc, Map<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
this.loc = loc;
PerSamplePileupElementTracker<PE> tracker = new PerSamplePileupElementTracker<PE>();
for(Map.Entry<Sample,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
for(Map.Entry<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
tracker.addElements(pileupEntry.getKey(),pileupEntry.getValue().pileupElementTracker);
addPileupToCumulativeStats(pileupEntry.getValue());
}
@ -213,7 +211,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutDeletions();
filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -251,7 +249,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getOverlappingFragmentFilteredPileup();
filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -305,7 +303,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutMappingQualityZeroReads();
filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -334,7 +332,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPositiveStrandPileup();
filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -363,7 +361,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getNegativeStrandPileup();
filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -393,7 +391,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getFilteredPileup(filter);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -425,7 +423,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ,minMapQ);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -492,7 +490,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroup(targetReadGroupId);
if(pileup != null)
@ -523,7 +521,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForLane(laneID);
if(pileup != null)
@ -550,14 +548,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
public Collection<String> getSampleNames() {
public Collection<String> getSamples() {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
Collection<String> sampleNames = new HashSet<String>();
for (Sample sample : tracker.getSamples()) {
sampleNames.add(sample.getId());
}
return sampleNames;
return new HashSet<String>(tracker.getSamples());
}
else {
Collection<String> sampleNames = new HashSet<String>();
@ -570,16 +564,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
@Override
public Collection<Sample> getSamples() {
if(!(pileupElementTracker instanceof PerSamplePileupElementTracker)) {
throw new StingException("Must be an instance of PerSampleElementTracker");
}
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
return tracker.getSamples();
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
@ -604,7 +588,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
int current = 0;
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
@ -639,7 +623,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
@Override
public RBP getPileupForSampleNames(Collection<String> sampleNames) {
public RBP getPileupForSamples(Collection<String> sampleNames) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PileupElementTracker<PE> filteredElements = tracker.getElements(sampleNames);
@ -665,7 +649,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
@Override
public RBP getPileupForSampleName(String sampleName) {
public RBP getPileupForSample(String sampleName) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PileupElementTracker<PE> filteredElements = tracker.getElements(sampleName);
@ -688,30 +672,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
@Override
public RBP getPileupForSample(Sample sample) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PileupElementTracker<PE> filteredElements = tracker.getElements(sample);
return filteredElements != null ? (RBP)createNewPileup(loc,filteredElements) : null;
}
else {
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for(PE p: pileupElementTracker) {
SAMRecord read = p.getRead();
if(sample != null) {
if(read.getReadGroup() != null && sample.getId().equals(read.getReadGroup().getSample()))
filteredTracker.add(p);
}
else {
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
filteredTracker.add(p);
}
}
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
}
}
// --------------------------------------------------------
//
// iterators
@ -801,7 +761,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
int[] countsBySample = createNewPileup(loc,tracker.getElements(sample)).getBaseCounts();
for(int i = 0; i < counts.length; i++)
counts[i] += countsBySample[i];

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import java.util.Comparator;
import java.util.Iterator;
@ -42,7 +41,7 @@ class MergingPileupElementIterator<PE extends PileupElement> implements Iterator
public MergingPileupElementIterator(PerSamplePileupElementTracker<PE> tracker) {
perSampleIterators = new PriorityQueue<PeekableIterator<PE>>(Math.max(1,tracker.getSamples().size()),new PileupElementIteratorComparator());
for(Sample sample: tracker.getSamples()) {
for(final String sample: tracker.getSamples()) {
PileupElementTracker<PE> trackerPerSample = tracker.getElements(sample);
if(trackerPerSample.size() != 0)
perSampleIterators.add(new PeekableIterator<PE>(trackerPerSample.iterator()));

View File

@ -24,8 +24,6 @@
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import java.util.*;
/**
@ -60,52 +58,35 @@ class UnifiedPileupElementTracker<PE extends PileupElement> extends PileupElemen
}
class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
private final Map<Sample,PileupElementTracker<PE>> pileup;
private final Map<String, Sample> sampleNames = new HashMap<String, Sample>();
private final Map<String,PileupElementTracker<PE>> pileup;
private int size = 0;
public PerSamplePileupElementTracker() {
pileup = new HashMap<Sample,PileupElementTracker<PE>>();
}
public PerSamplePileupElementTracker(Map<Sample,AbstractReadBackedPileup<?,PE>> pileupsBySample) {
pileup = new HashMap<Sample,PileupElementTracker<PE>>();
for(Map.Entry<Sample,AbstractReadBackedPileup<?,PE>> entry: pileupsBySample.entrySet()) {
Sample sample = entry.getKey();
AbstractReadBackedPileup<?,PE> pileupBySample = entry.getValue();
pileup.put(sample,pileupBySample.pileupElementTracker);
sampleNames.put(sample.getId(), sample);
}
pileup = new HashMap<String,PileupElementTracker<PE>>();
}
/**
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
*/
public Collection<Sample> getSamples() {
public Collection<String> getSamples() {
return pileup.keySet();
}
public PileupElementTracker<PE> getElements(final Sample sample) {
public PileupElementTracker<PE> getElements(final String sample) {
return pileup.get(sample);
}
public PileupElementTracker<PE> getElements(final String sampleName) {
return pileup.get(sampleNames.get(sampleName));
}
public PileupElementTracker<PE> getElements(final Collection<String> selectSampleNames) {
PerSamplePileupElementTracker<PE> result = new PerSamplePileupElementTracker<PE>();
for (String sample : selectSampleNames) {
Sample sampleObject = sampleNames.get(sample);
result.addElements(sampleObject, pileup.get(sampleObject));
for (final String sample : selectSampleNames) {
result.addElements(sample, pileup.get(sample));
}
return result;
}
public void addElements(final Sample sample, PileupElementTracker<PE> elements) {
public void addElements(final String sample, PileupElementTracker<PE> elements) {
pileup.put(sample,elements);
sampleNames.put(sample.getId(), sample);
size += elements.size();
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
@ -121,20 +120,7 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
*/
public Collection<String> getSampleNames();
/**
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
*/
public Collection<Sample> getSamples();
/**
* Gets the particular subset of this pileup with the given sample name.
* @param sample Name of the sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedExtendedEventPileup getPileupForSample(Sample sample);
public Collection<String> getSamples();
public Iterable<ExtendedEventPileupElement> toExtendedIterable();

View File

@ -24,7 +24,6 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -56,7 +55,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
}
// this is the good new one
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<Sample,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
super(loc,pileupElementsBySample);
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.HasGenomeLocation;
@ -137,18 +136,11 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
*/
public ReadBackedPileup getPileupForLane(String laneID);
/**
* Gets a collection of all the samples stored in this pileup.
* @return Collection of samples in this pileup.
*/
public Collection<Sample> getSamples();
/**
* Gets a collection of *names* of all the samples stored in this pileup.
* @return Collection of names
*/
public Collection<String> getSampleNames();
public Collection<String> getSamples();
/**
@ -156,7 +148,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
* @param sampleNames Name of the sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedPileup getPileupForSampleNames(Collection<String> sampleNames);
public ReadBackedPileup getPileupForSamples(Collection<String> sampleNames);
/**
@ -164,14 +156,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
* @param sampleName Name of the sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedPileup getPileupForSampleName(String sampleName);
/**
* Gets the particular subset of this pileup with the given sample.
* @param sample Sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedPileup getPileupForSample(Sample sample);
public ReadBackedPileup getPileupForSample(String sampleName);
/**
* Simple useful routine to count the number of deletion bases in this pileup

View File

@ -24,7 +24,6 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
@ -48,7 +47,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
super(loc,pileupElements);
}
public ReadBackedPileupImpl(GenomeLoc loc, Map<Sample,ReadBackedPileupImpl> pileupElementsBySample) {
public ReadBackedPileupImpl(GenomeLoc loc, Map<String,ReadBackedPileupImpl> pileupElementsBySample) {
super(loc,pileupElementsBySample);
}

View File

@ -51,18 +51,18 @@ public class NWaySAMFileWriter implements SAMFileWriter {
private boolean presorted ;
GenomeAnalysisEngine toolkit;
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
this.presorted = presorted;
this.toolkit = toolkit;
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly);
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5);
}
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly ) {
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
this.presorted = presorted;
this.toolkit = toolkit;
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
setupByReader(toolkit,ext,order, presorted, indexOnTheFly);
setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5);
}
@ -73,8 +73,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
* @param toolkit
* @param in2out
*/
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
@ -88,7 +87,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
if ( writerMap.containsKey( rid ) )
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
addWriter(rid,outName, order, presorted, indexOnTheFly);
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
}
}
@ -101,7 +100,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
* @param toolkit
* @param ext
*/
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
@ -119,16 +118,19 @@ public class NWaySAMFileWriter implements SAMFileWriter {
if ( writerMap.containsKey( rid ) )
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
addWriter(rid,outName, order, presorted, indexOnTheFly);
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
}
}
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
File f = new File(outName);
SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
header.setSortOrder(order);
SAMFileWriter sw = new SAMFileWriterFactory().setCreateIndex(indexOnTheFly).makeSAMOrBAMWriter(header, presorted, f);
SAMFileWriterFactory factory = new SAMFileWriterFactory();
factory.setCreateIndex(indexOnTheFly);
factory.setCreateMd5File(generateMD5);
SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
writerMap.put(id,sw);
}

View File

@ -99,9 +99,9 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
*
* @param reader
*/
public XReadLines(final BufferedReader reader, final boolean trimWhitespace) {
public XReadLines(final Reader reader, final boolean trimWhitespace) {
try {
this.in = reader;
this.in = new BufferedReader(reader);
nextline = readNextLine();
this.trimWhitespace = trimWhitespace;
} catch(IOException e) {
@ -109,7 +109,7 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
}
}
public XReadLines(final BufferedReader reader) throws FileNotFoundException {
public XReadLines(final Reader reader) {
this(reader, true);
}

View File

@ -132,7 +132,7 @@ import java.util.*;
* vc.hasGenotypes()
* vc.isMonomorphic()
* vc.isPolymorphic()
* vc.getSampleNames().size()
* vc.getSamples().size()
*
* vc.getGenotypes()
* vc.getGenotypes().get("g1")

View File

@ -129,7 +129,7 @@ public class MD5DB {
System.out.printf("##### Skipping update, cannot write file %s%n", dbFile);
}
} else {
System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
//System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
}
}
@ -170,6 +170,18 @@ public class MD5DB {
return bytes;
}
public static class MD5Match {
final String md5;
final String failMessage;
boolean failed;
public MD5Match(final String md5, final String failMessage, final boolean failed) {
this.md5 = md5;
this.failMessage = failMessage;
this.failed = failed;
}
}
/**
* Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different.
* @param name Name of the test.
@ -178,18 +190,21 @@ public class MD5DB {
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
* @return The calculated MD5.
*/
public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
public static MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
final String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
String failMessage = null;
boolean failed = false;
if (parameterize || expectedMD5.equals("")) {
// Don't assert
} else if ( filemd5sum.equals(expectedMD5) ) {
System.out.println(String.format(" => %s PASSED", name));
System.out.println(String.format(" => %s PASSED (expected=%s)", name, expectedMD5));
} else {
Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
failed = true;
failMessage = String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum);
}
return filemd5sum;
return new MD5Match(filemd5sum, failMessage, failed);
}
@ -218,8 +233,8 @@ public class MD5DB {
System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b",
name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5)));
} else {
System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
System.out.flush();
//System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
//System.out.flush();
if ( ! expectedMD5.equals(filemd5sum) ) {
// we are going to fail for real in assertEquals (so we are counted by the testing framework).

View File

@ -52,7 +52,7 @@ public class WalkerTest extends BaseTest {
GenomeAnalysisEngine.resetRandomGenerator();
}
public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
public MD5DB.MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
}
@ -84,10 +84,23 @@ public class WalkerTest extends BaseTest {
public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
List<String> md5s = new ArrayList<String>();
List<MD5DB.MD5Match> fails = new ArrayList<MD5DB.MD5Match>();
for (int i = 0; i < resultFiles.size(); i++) {
String md5 = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
maybeValidateSupplementaryFile(name, resultFiles.get(i));
md5s.add(i, md5);
MD5DB.MD5Match result = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
if ( ! result.failed ) {
maybeValidateSupplementaryFile(name, resultFiles.get(i));
md5s.add(result.md5);
} else {
fails.add(result);
}
}
if ( ! fails.isEmpty() ) {
for ( final MD5DB.MD5Match fail : fails ) {
logger.warn("Fail: " + fail.failMessage);
}
Assert.fail("Test failed: " + name);
}
return md5s;

View File

@ -8,11 +8,9 @@ import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.testng.annotations.BeforeClass;
@ -51,7 +49,7 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.<SAMReaderID>emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);
@ -67,7 +65,7 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(shardBounds));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
@ -82,7 +80,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -96,7 +94,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -110,7 +108,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -124,7 +122,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -138,7 +136,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -152,7 +150,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -167,7 +165,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -186,7 +184,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -205,7 +203,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -226,7 +224,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -254,7 +252,7 @@ public abstract class LocusViewTemplate extends BaseTest {
read07, read08, read09, read10, read11, read12);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);

View File

@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.datasources.reads;
import com.google.caliper.Param;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Tags;
@ -34,15 +33,12 @@ import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.util.Collections;
import java.util.Iterator;
@ -88,12 +84,9 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
(byte)0);
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
SampleDataSource sampleDataSource = new SampleDataSource();
sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
while(locusIteratorByState.hasNext()) {
locusIteratorByState.next().getLocation();
}

View File

@ -1,241 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.testng.annotations.Test;
import java.io.File;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Sep 9, 2010
* Time: 8:21:00 AM
*/
public class SampleDataSourceUnitTest extends BaseTest {
// this empty header used to instantiate sampledatasource objects
private static SAMFileHeader header = new SAMFileHeader();
// all the test sample files are located here
private String sampleFilesDir = validationDataLocation + "samples/";
// make sure samples are created from the SAM file correctly
@Test()
public void loadSAMSamplesTest() {
SampleDataSource s = new SampleDataSource(header, null);
}
// tests that a basic sample with relationships loads correctly
// Note that this is the only test for family relationships - we may want to expand this
@Test()
public void basicLoadSampleFileTest() {
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
Assert.assertTrue(s.sampleCount() == 5);
Sample sampleA = s.getSampleById("sampleA");
Sample sampleB = s.getSampleById("sampleB");
Assert.assertTrue(sampleB.getMother() == sampleA);
Assert.assertTrue(s.getChildren(sampleA).contains(sampleB));
Set<Sample> family = s.getFamily("family1");
Assert.assertTrue(family.size() == 2);
Assert.assertTrue(family.contains(sampleA));
Assert.assertTrue(family.contains(sampleB));
}
// but that file should fail if it has an extra character in it...
@Test(expectedExceptions=StingException.class)
public void loadInvalidSampleExtraCharText() {
File sampleFile = new File(sampleFilesDir + "invalidSyntaxExtraChar.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
}
// ...or a typo...
@Test(expectedExceptions=StingException.class)
public void loadInvalidSampleTypoText() {
File sampleFile = new File(sampleFilesDir + "invalidSyntaxTypo.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
}
// ...or an extra unrecognized array
@Test(expectedExceptions=StingException.class)
public void loadInvalidSampleExtraArrayText() {
File sampleFile = new File(sampleFilesDir + "invalidSyntaxExtraArray.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
}
// make sure aliases work
@Test(expectedExceptions=StingException.class)
public void sampleAliasText() {
File sampleFile = new File(sampleFilesDir + "basicSampleFileWithAlias.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
// this file has two samples, but one has an alias. let's make sure that checks out...
Assert.assertTrue(s.sampleCount() == 3);
Assert.assertTrue(s.getSampleById("sampleA") == s.getSampleById("sampleC"));
}
// error is thrown if property is included that's not in properties array
@Test(expectedExceptions=StingException.class)
public void unallowedPropertySampleTest() {
File sampleFile = new File(sampleFilesDir + "basicSampleFileUnallowedProperty.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
}
// same as above, with relationship
@Test(expectedExceptions=StingException.class)
public void unallowedRelationshipSampleTest() {
File sampleFile = new File(sampleFilesDir + "basicSampleFileUnallowedRelationship.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
}
// two sample files
@Test()
public void twoSampleFilesTest() {
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
File secondFile = new File(sampleFilesDir + "basicSampleFileExt.yaml");
ArrayList<File> files = new ArrayList<File>();
files.add(sampleFile);
files.add(secondFile);
SampleDataSource s = new SampleDataSource(header, files);
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propC").equals("valC"));
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
}
// two sample files, with contradictory properties
@Test(expectedExceptions=StingException.class)
public void twoContradictorySampleFilesTest() {
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
File secondFile = new File(sampleFilesDir + "basicSampleFileInvalidExt.yaml");
ArrayList<File> files = new ArrayList<File>();
files.add(sampleFile);
files.add(secondFile);
SampleDataSource s = new SampleDataSource(header, files);
}
// three sample files
@Test()
public void threeSamplesTest() {
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
ArrayList<File> files = new ArrayList<File>();
files.add(sampleFile);
files.add(new File(sampleFilesDir + "basicSampleFileExt.yaml"));
files.add(new File(sampleFilesDir + "basicSampleFileExt2.yaml"));
SampleDataSource s = new SampleDataSource(header, files);
Assert.assertTrue(s.sampleCount() == 6);
Assert.assertTrue(s.getSampleById("sampleE").getProperty("propC").equals("valC"));
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
}
/**
* testing getSamplesWithProperty
* in this file there are 5 samples - 2 with population "CEU", 1 with population "ABC", 1 with no population,
* and then the default null sample
*/
@Test()
public void getSamplesWithPropertyTest() {
File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
Assert.assertTrue(s.sampleCount() == 5);
Set<Sample> ceuSamples = s.getSamplesWithProperty("population", "CEU");
Assert.assertTrue(ceuSamples.size() == 2);
Iterator<Sample> i = ceuSamples.iterator();
ArrayList<String> sampleNames = new ArrayList<String>();
sampleNames.add(i.next().getId());
sampleNames.add(i.next().getId());
Assert.assertTrue(sampleNames.contains("sampleA"));
Assert.assertTrue(sampleNames.contains("sampleB"));
}
// make sure we can import data types other than Strings
@Test()
public void sampleTestPropertyType() {
File sampleFile = new File(sampleFilesDir + "sampleFileOtherTypes.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
Sample sample = s.getSampleById("sampleA");
Assert.assertTrue(sample.getProperty("a").getClass() == Integer.class);
Assert.assertTrue(sample.getProperty("b").getClass() == String.class);
Assert.assertTrue(sample.getProperty("c").getClass() == Double.class);
Assert.assertTrue(sample.getProperty("b").getClass() == String.class);
}
/**
* check that getSamplesFromVariantContext works
* create a variant context with two sample names, and make sure the right samples are there
*/
@Test()
public void variantContextTest() {
SampleDataSource s = new SampleDataSource(header, null);
List<Allele> alleleCollection = new ArrayList<Allele>();
Allele a1 = Allele.create("A", true);
alleleCollection.add(a1);
Set<Genotype> genotypeCollection = new HashSet<Genotype>();
genotypeCollection.add(new Genotype("NA123", alleleCollection));
genotypeCollection.add(new Genotype("NA456", alleleCollection));
VariantContext v = new VariantContext("contextName", "chr1", 1, 1, alleleCollection, genotypeCollection);
// make sure the set that's returned is the right size
HashSet<Sample> set = (HashSet) s.getSamplesByVariantContext(v);
Assert.assertTrue(set.size() == 2);
// make sure both samples are included
Iterator<Sample> i = set.iterator();
ArrayList<String> sampleNames = new ArrayList<String>();
sampleNames.add(i.next().getId());
sampleNames.add(i.next().getId());
Assert.assertTrue(sampleNames.contains("NA123"));
Assert.assertTrue(sampleNames.contains("NA456"));
}
/**
* checking subContextFromSampleProperty
*/
/**
* check that subContextFromSampleProperty works
* create a variant context with four sample names, make sure that it filters correctly to 2
*/
@Test()
public void subContextFromSamplePropertyTest() {
File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
Assert.assertTrue(s.sampleCount() == 5);
List<Allele> alleleCollection = new ArrayList<Allele>();
Allele a1 = Allele.create("A", true);
alleleCollection.add(a1);
Set<Genotype> genotypeCollection = new HashSet<Genotype>();
genotypeCollection.add(new Genotype("NA123", alleleCollection));
genotypeCollection.add(new Genotype("sampleA", alleleCollection));
genotypeCollection.add(new Genotype("sampleB", alleleCollection));
genotypeCollection.add(new Genotype("sampleC", alleleCollection));
VariantContext v = new VariantContext("contextName", "chr1", 1, 1, alleleCollection, genotypeCollection);
VariantContext subContext = s.subContextFromSampleProperty(v, "population", "CEU");
Assert.assertTrue(subContext.getSampleNames().contains("sampleA"));
Assert.assertTrue(subContext.getSampleNames().contains("sampleA"));
Assert.assertTrue(subContext.getSampleNames().size() == 2);
}
// we create lots of single item lists...
private ArrayList<File> makeFileList(File file) {
ArrayList<File> a = new ArrayList<File>();
a.add(file);
return a;
}
}

View File

@ -1,64 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Sep 9, 2010
* Time: 8:21:00 AM
*/
public class SampleUnitTest extends BaseTest {
static Sample sampleA;
static Sample sampleA1;
static Sample sampleB;
static Sample sampleC;
@BeforeClass
public void init() {
sampleA = new Sample("sampleA");
sampleA.setProperty("uniqueProperty", "uniqueValue");
sampleA1 = new Sample("sampleA");
sampleA1.setProperty("uniqueProperty", "uniqueValue");
sampleB = new Sample("sampleB");
sampleC = new Sample("sampleC");
sampleC.setProperty("population", "pop1");
sampleC.setProperty("gender", Sample.Gender.MALE);
}
/**
* Testing equality
*/
@Test()
public void equalsTest() {
Assert.assertTrue(sampleA.equals(sampleA1));
Assert.assertFalse(sampleA == sampleA1);
Assert.assertFalse(sampleA.equals(sampleB));
}
/**
* And hash
*/
@Test()
public void basicHashTest() {
Assert.assertFalse(sampleA.hashCode() == sampleB.hashCode());
Assert.assertTrue(sampleA.hashCode() == sampleA1.hashCode());
}
/**
* Now test the special getter methods
*/
@Test()
public void specialGettersTest() {
Assert.assertTrue(sampleC.getId().equals("sampleC"));
Assert.assertTrue(sampleC.getPopulation().equals("pop1"));
Assert.assertTrue(sampleC.isMale());
Assert.assertFalse(sampleA.isMale()); // sample A doesn't have a gender, so this should be false
}
}

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
@ -12,7 +11,6 @@ import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -29,11 +27,8 @@ import java.util.*;
* testing of the LocusIteratorByState
*/
public class LocusIteratorByStateUnitTest extends BaseTest {
private final int MAX_READS = 10;
private static SAMFileHeader header;
private LocusIteratorByState li;
private GenomeLocParser genomeLocParser;
@BeforeClass
@ -42,6 +37,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
}
private final LocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
}
@Test
public void testIndelBaseQualityFiltering() {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
@ -68,7 +68,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
List<SAMRecord> reads = Arrays.asList(before,during,after);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser, new SampleDataSource());
li = makeLTBS(reads,readAttributes);
boolean foundExtendedEventPileup = false;
while (li.hasNext()) {
@ -120,7 +120,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
List<SAMRecord> reads = Arrays.asList(before,during,after);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser, new SampleDataSource());
li = makeLTBS(reads,readAttributes);
boolean foundExtendedEventPileup = false;
while (li.hasNext()) {
@ -154,7 +154,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
li = makeLTBS(reads, readAttributes);
// Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read
// and considers it to be an indel-containing read.
@ -167,7 +167,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
// Turn on extended events, and make sure the event is found.
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
li = makeLTBS(reads, readAttributes);
Assert.assertTrue(li.hasNext(),"LocusIteratorByState with extended events should contain exactly one pileup");
alignmentContext = li.next();
@ -203,7 +203,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
List<SAMRecord> reads = Arrays.asList(leadingRead,indelOnlyRead,fullMatchAfterIndel);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),createTestReadProperties(),genomeLocParser,new SampleDataSource());
li = makeLTBS(reads, createTestReadProperties());
int currentLocus = firstLocus;
int numAlignmentContextsFound = 0;
@ -260,7 +260,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
List<SAMRecord> reads = Arrays.asList(leadingRead,indelOnlyRead,fullMatchAfterIndel);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
li = makeLTBS(reads,readAttributes);
Assert.assertTrue(li.hasNext(),"Missing first locus at " + firstLocus);
AlignmentContext alignmentContext = li.next();

View File

@ -0,0 +1,353 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.StringReader;
import java.util.*;
/**
* UnitTest for PedReader
*
* @author Mark DePristo
* @since 2011
*/
public class PedReaderUnitTest extends BaseTest {
private static Logger logger = Logger.getLogger(PedReaderUnitTest.class);
private class PedReaderTest extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedField> missing;
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
this.fileContents = fileContents;
this.expectedSamples = expectedSamples;
}
}
// Family ID
// Individual ID
// Paternal ID
// Maternal ID
// Sex (1=male; 2=female; other=unknown)
// Phenotype
//
// -9 missing
// 0 missing
// 1 unaffected
// 2 affected
@DataProvider(name = "readerTest")
public Object[][] createPEDFiles() {
new PedReaderTest("singleRecordMale",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 1 1");
new PedReaderTest("singleRecordFemale",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 2 1");
new PedReaderTest("singleRecordMissingGender",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.UNKNOWN, Affection.UNKNOWN)),
"fam1 kid 0 0 0 0");
// Affection
new PedReaderTest("singleRecordAffected",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED)),
"fam1 kid 0 0 1 2");
new PedReaderTest("singleRecordUnaffected",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 1 1");
new PedReaderTest("singleRecordMissingAffection-9",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
"fam1 kid 0 0 1 -9");
new PedReaderTest("singleRecordMissingAffection0",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
"fam1 kid 0 0 1 0");
new PedReaderTest("multipleUnrelated",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 2"));
new PedReaderTest("multipleUnrelatedExtraLine",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s%n %n", // note extra newlines and whitespace
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 2"));
new PedReaderTest("explicitTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2"));
new PedReaderTest("implicitTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
"fam1 kid dad mom 1 2");
new PedReaderTest("partialTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1"));
new PedReaderTest("bigPedigree",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad granddad1 grandma1 1 1",
"fam1 mom granddad2 grandma2 2 2"));
// Quantitative trait
new PedReaderTest("OtherPhenotype",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.OTHER, "1"),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10.0")),
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10.0"));
new PedReaderTest("OtherPhenotypeWithMissing",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10.0")),
String.format("%s%n%s",
"fam1 s1 0 0 1 -9",
"fam2 s2 0 0 2 10.0"));
new PedReaderTest("OtherPhenotypeOnlyInts",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.OTHER, "1"),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10")),
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10"));
return PedReaderTest.getTests(PedReaderTest.class);
}
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDB sampleDB = new SampleDB();
List<Sample> readSamples = reader.parse(myFileContents, missing, sampleDB);
Assert.assertEquals(new HashSet<Sample>(test.expectedSamples), new HashSet<Sample>(readSamples));
}
@Test(enabled = true, dataProvider = "readerTest")
public void testPedReader(PedReaderTest test) {
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class));
}
@Test(enabled = true, dataProvider = "readerTest")
public void testPedReaderWithComments(PedReaderTest test) {
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class));
}
@Test(enabled = true, dataProvider = "readerTest")
public void testPedReaderWithSemicolons(PedReaderTest test) {
runTest(test,
test.fileContents.replace(String.format("%n"), ";"),
EnumSet.noneOf(PedReader.MissingPedField.class));
}
// -----------------------------------------------------------------
// missing format field tests
// -----------------------------------------------------------------
private class PedReaderTestMissing extends TestDataProvider {
public EnumSet<PedReader.MissingPedField> missingDesc;
public EnumSet<PedReader.Field> missingFields;
public final String fileContents;
public Sample expected;
private PedReaderTestMissing(final String name, final String fileContents,
EnumSet<PedReader.MissingPedField> missingDesc,
EnumSet<PedReader.Field> missingFields,
final Sample expected) {
super(PedReaderTestMissing.class, name);
this.fileContents = fileContents;
this.missingDesc = missingDesc;
this.missingFields = missingFields;
this.expected = expected;
}
}
@DataProvider(name = "readerTestMissing")
public Object[][] createPEDFilesWithMissing() {
new PedReaderTestMissing("missingFam",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID),
EnumSet.of(PedReader.Field.FAMILY_ID),
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED));
new PedReaderTestMissing("missingParents",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PARENTS),
EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID),
new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED));
new PedReaderTestMissing("missingSex",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_SEX),
EnumSet.of(PedReader.Field.GENDER),
new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED));
new PedReaderTestMissing("missingPhenotype",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE),
EnumSet.of(PedReader.Field.PHENOTYPE),
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN));
new PedReaderTestMissing("missingEverythingButGender",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID),
EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE),
new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN));
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
}
@Test(enabled = true, dataProvider = "readerTestMissing")
public void testPedReaderWithMissing(PedReaderTestMissing test) {
final String contents = sliceContents(test.missingFields, test.fileContents);
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDB sampleDB = new SampleDB();
reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
final Sample missingSample = sampleDB.getSample("kid");
Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);
}
private final static String sliceContents(EnumSet<PedReader.Field> missingFieldsSet, String full) {
List<String> parts = new ArrayList<String>(Arrays.asList(full.split("\\s+")));
final List<PedReader.Field> missingFields = new ArrayList<PedReader.Field>(missingFieldsSet);
Collections.reverse(missingFields);
for ( PedReader.Field field : missingFields )
parts.remove(field.ordinal());
return Utils.join("\t", parts);
}
// -----------------------------------------------------------------
// parsing tags
// -----------------------------------------------------------------
private class PedReaderTestTagParsing extends TestDataProvider {
public EnumSet<PedReader.MissingPedField> expected;
public final List<String> tags;
private PedReaderTestTagParsing(final List<String> tags, EnumSet<PedReader.MissingPedField> missingDesc) {
super(PedReaderTestTagParsing.class);
this.tags = tags;
this.expected = missingDesc;
}
}
@DataProvider(name = "readerTestTagParsing")
public Object[][] createReaderTestTagParsing() {
new PedReaderTestTagParsing(
Collections.<String>emptyList(),
EnumSet.noneOf(PedReader.MissingPedField.class));
new PedReaderTestTagParsing(
Arrays.asList("NO_FAMILY_ID"),
EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID));
new PedReaderTestTagParsing(
Arrays.asList("NO_PARENTS"),
EnumSet.of(PedReader.MissingPedField.NO_PARENTS));
new PedReaderTestTagParsing(
Arrays.asList("NO_PHENOTYPE"),
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE));
new PedReaderTestTagParsing(
Arrays.asList("NO_SEX"),
EnumSet.of(PedReader.MissingPedField.NO_SEX));
new PedReaderTestTagParsing(
Arrays.asList("NO_SEX", "NO_PHENOTYPE"),
EnumSet.of(PedReader.MissingPedField.NO_SEX, PedReader.MissingPedField.NO_PHENOTYPE));
new PedReaderTestTagParsing(
Arrays.asList("NO_SEX", "NO_PHENOTYPE", "NO_PARENTS"),
EnumSet.of(PedReader.MissingPedField.NO_SEX, PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS));
return PedReaderTestTagParsing.getTests(PedReaderTestTagParsing.class);
}
@Test(enabled = true, dataProvider = "readerTestTagParsing")
public void testPedReaderTagParsing(PedReaderTestTagParsing test) {
EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", test.tags);
Assert.assertEquals(test.expected, parsed, "Failed to properly parse tags " + test.tags);
}
@Test(enabled = true, expectedExceptions = UserException.class)
public void testPedReaderTagParsing1() {
EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", Arrays.asList("XXX"));
}
@Test(enabled = true, expectedExceptions = UserException.class)
public void testPedReaderTagParsing2() {
EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", Arrays.asList("NO_SEX", "XXX"));
}
}

View File

@ -0,0 +1,157 @@
package org.broadinstitute.sting.gatk.samples;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.io.File;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Sep 9, 2010
* Time: 8:21:00 AM
*/
public class SampleDBUnitTest extends BaseTest {
private static SampleDBBuilder builder;
// all the test sample files are located here
private File testPED = new File(testDir + "ceutrio.ped");
private static final Set<Sample> testPEDSamples = new HashSet<Sample>(Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
private static final String testPEDString =
String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2");
private static final String testPEDMultipleFamilies =
String.format("%s%n%s%n%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2",
"fam3 s1 d1 m1 2 2",
"fam2 s2 d2 m2 2 2");
private static final String testPEDStringInconsistentGender =
"fam1 kid 0 0 2 2";
private static final Set<Sample> testPEDSamplesAsSet =
new HashSet<Sample>(testPEDSamples);
@BeforeMethod
public void before() {
builder = new SampleDBBuilder(PedigreeValidationType.STRICT);
}
@Test()
public void loadPEDFile() {
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(testPEDSamplesAsSet, db.getSamples());
}
@Test()
public void loadPEDString() {
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDString));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(testPEDSamplesAsSet, db.getSamples());
}
private static final void addSAMHeader() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
ArtificialSAMUtils.createEnumeratedReadGroups(header, Arrays.asList("1", "2", "3"),
Arrays.asList("kid", "mom", "dad"));
builder.addSamplesFromSAMHeader(header);
}
@Test()
public void loadSAMHeader() {
addSAMHeader();
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(testSAMSamples, db.getSamples());
}
@Test()
public void loadSAMHeaderPlusPED() {
addSAMHeader();
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(testPEDSamples, db.getSamples());
}
@Test()
public void loadDuplicateData() {
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(testPEDSamples, db.getSamples());
}
@Test(expectedExceptions = UserException.class)
public void loadNonExistentFile() {
builder.addSamplesFromPedigreeFiles(Arrays.asList(new File("non-existence-file.txt")));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(testSAMSamples, db.getSamples());
}
@Test(expectedExceptions = UserException.class)
public void loadInconsistentData() {
builder = new SampleDBBuilder(PedigreeValidationType.STRICT);
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender));
builder.getFinalSampleDB();
}
@Test(expectedExceptions = UserException.class)
public void sampleInSAMHeaderNotInSamplesDB() {
addSAMHeader();
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender));
builder.getFinalSampleDB();
}
@Test()
public void getFamilyIDs() {
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(db.getFamilyIDs(), new TreeSet<String>(Arrays.asList("fam1", "fam2", "fam3")));
}
@Test()
public void getFamily() {
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
SampleDB db = builder.getFinalSampleDB();
Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
}
@Test()
public void loadFamilyIDs() {
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
SampleDB db = builder.getFinalSampleDB();
Map<String, Set<Sample>> families = db.getFamilies();
Assert.assertEquals(families.size(), 3);
Assert.assertEquals(families.keySet(), new TreeSet<String>(Arrays.asList("fam1", "fam2", "fam3")));
for ( final String famID : families.keySet() ) {
final Set<Sample> fam = families.get(famID);
Assert.assertEquals(fam.size(), 3);
for ( final Sample sample : fam ) {
Assert.assertEquals(sample.getFamilyID(), famID);
}
}
}
}

View File

@ -0,0 +1,64 @@
package org.broadinstitute.sting.gatk.samples;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
/**
*
*/
public class SampleUnitTest extends BaseTest {
SampleDB db;
static Sample fam1A, fam1B, fam1C;
static Sample s1, s2;
static Sample trait1, trait2, trait3, trait4, trait5;
@BeforeClass
public void init() {
db = new SampleDB();
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN);
fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);
fam1C = new Sample("1C", db, "fam1", null, null, Gender.FEMALE);
s1 = new Sample("s1", db);
s2 = new Sample("s2", db);
trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT);
trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT);
trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT);
trait4 = new Sample("t4", db, Affection.OTHER, "1.0");
trait5 = new Sample("t4", db, Affection.OTHER, "CEU");
}
/**
* Now basic getters
*/
@Test()
public void normalGettersTest() {
Assert.assertEquals("1A", fam1A.getID());
Assert.assertEquals("fam1", fam1A.getFamilyID());
Assert.assertEquals("1B", fam1A.getPaternalID());
Assert.assertEquals("1C", fam1A.getMaternalID());
Assert.assertEquals(null, fam1B.getPaternalID());
Assert.assertEquals(null, fam1B.getMaternalID());
Assert.assertEquals(Affection.AFFECTED, trait1.getAffection());
Assert.assertEquals(Sample.UNSET_QT, trait1.getOtherPhenotype());
Assert.assertEquals(Affection.UNAFFECTED, trait2.getAffection());
Assert.assertEquals(Sample.UNSET_QT, trait2.getOtherPhenotype());
Assert.assertEquals(Affection.UNKNOWN, trait3.getAffection());
Assert.assertEquals(Sample.UNSET_QT, trait3.getOtherPhenotype());
Assert.assertEquals(Affection.OTHER, trait4.getAffection());
Assert.assertEquals("1.0", trait4.getOtherPhenotype());
Assert.assertEquals("CEU", trait5.getOtherPhenotype());
}
@Test()
public void testGenders() {
Assert.assertTrue(fam1A.getGender() == Gender.UNKNOWN);
Assert.assertTrue(fam1B.getGender() == Gender.MALE);
Assert.assertTrue(fam1C.getGender() == Gender.FEMALE);
}
}

View File

@ -57,11 +57,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
// now add the expected files that get generated
spec.addAuxFile("423571e4c05e7934322172654ac6dbb7", baseOutputFile);
spec.addAuxFile("9df5e7e07efeb34926c94a724714c219", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
spec.addAuxFile("b9a7748e5aec4dc06daed893c901c00d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
spec.addAuxFile("aec669d64d9dd652dd088a5341835ea5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
spec.addAuxFile("f6dbd74d32a48abe71ce08d300bce983", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
spec.addAuxFile("e3a3467ed259ee3680f8d01980f525b7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
spec.addAuxFile("471c34ad2e4f7228efd20702d5941ba9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
spec.addAuxFile("9667c77284c2c08e647b162d0e9652d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
spec.addAuxFile("5a96c75f96d6fa6ee617451d731dae37", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
spec.addAuxFile("b82846df660f0aac8429aec57c2a62d6", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
@ -69,11 +69,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
spec.addAuxFile("e70952f241eebb9b5448f2e7cb288131", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
spec.addAuxFile("054ed1e184f46d6a170dc9bf6524270c", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
spec.addAuxFile("d53431022f7387fe9ac47814ab1fcd88", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
spec.addAuxFile("650ee3714da7fbad7832c9d4ad49eb51", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("7dcac2e8962c778081486332a4576dc3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("a50011571334f17e950ad3ed1149e350", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("6f3260504295695d765af639539585c9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
spec.addAuxFile("e013cb5b11b0321a81c8dbd7c1863787", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("661160f571def8c323345b5859cfb9da", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("c95a7a6840334cadd0e520939615c77b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
execute("testBaseOutputNoFiltering",spec);
}
@ -90,7 +90,7 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
spec.setOutputFileLocation(baseOutputFile);
spec.addAuxFile("6ccd7d8970ba98cb95fe41636a070c1c",baseOutputFile);
spec.addAuxFile("0ee40f3e5091536c14e077b77557083a",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
spec.addAuxFile("7d87783b3d98b928cac16d383ceca807",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
execute("testNoCoverageDueToFiltering",spec);
}

View File

@ -29,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("e6639ea2dc81635c706e6c35921406d7"));
Arrays.asList("b27939251539439a382538e507e03507"));
executeTest("test MultiSample Pilot1", spec);
}
@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("4be308fd9e8167ebee677f62a7a753b7"));
Arrays.asList("37e891bf1ac40caec9ea228f39c27e44"));
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
}

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;

View File

@ -25,16 +25,14 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.*;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger

View File

@ -76,7 +76,7 @@ public class IntervalIntegrationTest extends WalkerTest {
// our base file
File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam");
spec.setOutputFileLocation(baseOutputFile);
spec.addAuxFile("99c266d777e2e167b8153c858c305fda",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
spec.addAuxFile("748a38ed5eb0a043dfc7b82f0d1e8063",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
spec.addAuxFile("fadcdf88597b9609c5f2a17f4c6eb455", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
executeTest("testUnmappedReadInclusion",spec);
@ -96,8 +96,8 @@ public class IntervalIntegrationTest extends WalkerTest {
// our base file
File baseOutputFile = createTempFile("testUnmappedReadExclusion",".bam");
spec.setOutputFileLocation(baseOutputFile);
spec.addAuxFile("8236f0b2df5a692e54751b08bc3836fa",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
spec.addAuxFile("651b42456d31ba24e913297b71b32143", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
spec.addAuxFile("80887ba488e53dabd9596ff93070ae75",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
spec.addAuxFile("b341d808ecc33217f37c0c0cde2a3e2f", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
executeTest("testUnmappedReadExclusion",spec);
}

View File

@ -28,7 +28,6 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.Test;
@ -141,9 +140,9 @@ public class ReadBackedPileupUnitTest {
ReadBackedPileupImpl sample2Pileup = new ReadBackedPileupImpl(null,
Arrays.asList(read2,read4),
Arrays.asList(1,1));
Map<Sample,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<Sample,ReadBackedPileupImpl>();
sampleToPileupMap.put(new Sample(readGroupOne.getSample()),sample1Pileup);
sampleToPileupMap.put(new Sample(readGroupTwo.getSample()),sample2Pileup);
Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();
sampleToPileupMap.put(readGroupOne.getSample(),sample1Pileup);
sampleToPileupMap.put(readGroupTwo.getSample(),sample2Pileup);
ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
@ -164,13 +163,13 @@ public class ReadBackedPileupUnitTest {
@Test
public void testGetPileupForSample() {
Sample sample1 = new Sample("sample1");
Sample sample2 = new Sample("sample2");
String sample1 = "sample1";
String sample2 = "sample2";
SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
readGroupOne.setSample(sample1.getId());
readGroupOne.setSample(sample1);
SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
readGroupTwo.setSample(sample2.getId());
readGroupTwo.setSample(sample2);
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
header.addReadGroup(readGroupOne);
@ -181,24 +180,20 @@ public class ReadBackedPileupUnitTest {
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
read2.setAttribute("RG",readGroupTwo.getId());
Map<Sample,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<Sample,ReadBackedPileupImpl>();
Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();
sampleToPileupMap.put(sample1,new ReadBackedPileupImpl(null,Collections.singletonList(read1),0));
sampleToPileupMap.put(sample2,new ReadBackedPileupImpl(null,Collections.singletonList(read2),0));
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
ReadBackedPileup sample1Pileup = pileup.getPileupForSample(sample1);
Assert.assertEquals(sample1Pileup.size(),1,"Sample 1 pileup has wrong number of elements");
Assert.assertEquals(sample1Pileup.getReads().get(0),read1,"Sample 1 pileup has incorrect read");
ReadBackedPileup sample2Pileup = pileup.getPileupForSampleName(sample2.getId());
ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2);
Assert.assertEquals(sample2Pileup.size(),1,"Sample 2 pileup has wrong number of elements");
Assert.assertEquals(sample2Pileup.getReads().get(0),read2,"Sample 2 pileup has incorrect read");
ReadBackedPileup missingSamplePileup = pileup.getPileupForSample(new Sample("missing"));
ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing");
Assert.assertNull(missingSamplePileup,"Pileup for sample 'missing' should be null but isn't");
missingSamplePileup = pileup.getPileupForSampleName("not here");
missingSamplePileup = pileup.getPileupForSample("not here");
Assert.assertNull(missingSamplePileup,"Pileup for sample 'not here' should be null but isn't");
}
}

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext;
import org.testng.annotations.Test;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;
/**
* Test to ensure that, given only the VCF jar and its expected dependencies, core VCF classes will load.
*/
public class VCFJarClassLoadingUnitTest {
@Test
public void testVCFJarClassLoading() throws ClassNotFoundException, MalformedURLException {
URI vcfURI = new File("dist/vcf.jar").toURI();
URI tribbleURI = new File("lib/tribble-24.jar").toURI();
ClassLoader classLoader = new URLClassLoader(new URL[] {vcfURI.toURL(),tribbleURI.toURL()}, null);
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFWriter");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter");
}
}

View File

@ -1,202 +0,0 @@
package org.broadinstitute.sting.queue.qscripts
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk.RodBind
import org.broadinstitute.sting.queue.extensions.gatk._
class StandardVariantEvaluation extends QScript {
// todo -- update to released version when things stabilize
@Argument(doc="gatkJarFile", required=false)
var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKFromLaptop/trunk/dist/GenomeAnalysisTK.jar")
@Argument(shortName = "R", doc="B37 reference sequence: defaults to broad standard location", required=false)
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
@Argument(shortName = "intervals", doc="intervals to evaluate. Only supports evaluation on chromosome 20 now, as most evaluation data is there", required=false)
val TARGET_INTERVAL: String = "20"
@Argument(shortName = "includeUnion", doc="If provided, we'll create a union of the evaluation data sets for evaluation", required=false)
val CREATE_UNION: Boolean = false
@Argument(shortName = "dataDir", doc="Path to the standard evaluation data files", required=false)
val DATA_DIR = "/humgen/gsa-hpprojects/GATK/data/Comparisons/StandardForEvaluation/b37/"
@Argument(shortName = "evalStandard1000GCalls", doc="If provided, we'll include some standard 1000G data for evaluation", required=false)
val EVAL_STANDARD_1000G_CALLS: Boolean = false
val COMPS_DIR = DATA_DIR + "/comps/"
val EVALS_DIR = DATA_DIR + "/evals/"
@Argument(shortName = "moreSNPsToEval", doc="Path to additional SNP call sets for evaluation", required=false)
val moreSNPsToEval: List[File] = Nil
@Argument(shortName = "moreIndelsToEval", doc="Path to additional Indel call sets for evaluation", required=false)
val moreIndelsToEval: List[File] = Nil
val VARIANT_TYPES: List[String] = List("indels", "snps")
val VARIANT_TYPE_VT: Map[String, List[org.broad.tribble.util.variantcontext.VariantContext.Type]] = Map(
"indels" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.INDEL, org.broad.tribble.util.variantcontext.VariantContext.Type.MIXED, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION),
"snps" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.SNP, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION)
)
val SITES_DIR: String = "sitesFiles"
// path to b37 DBSNP
@Argument(shortName = "dbsnp", doc="Path to DBSNP **VCF** for evaluation", required=false)
val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b37.leftAligned.vcf")
//val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf");
class Comp(val name: String, val evalType: String, val filename: String, val MakeHomVar: Boolean = false) {
val originalFile = new File(COMPS_DIR + filename)
val file: File = if ( MakeHomVar ) swapExt(originalFile, ".vcf",".homvar.vcf") else originalFile
val sitesFile = new File(SITES_DIR + "/" + swapExt(file, ".vcf", ".sites.vcf").getName)
}
class Eval(val name: String, val evalType: String, val filename: String, val overrideFile: File = null ) {
val file: File = if ( overrideFile != null ) overrideFile else new File(EVALS_DIR + "/" + filename)
}
var COMPS: List[Comp] = Nil
def addComp(comp: Comp) { COMPS = comp :: COMPS }
var EVALS: List[Eval] = Nil
def addEval(eval: Eval) { EVALS = eval :: EVALS }
def addEvalFromCMD(file: File, t: String) { addEval(new Eval(file.getName, t, null, file)) }
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
this.logging_level = "INFO";
this.jarFile = gatkJarFile;
this.intervalsString = List(TARGET_INTERVAL);
this.reference_sequence = referenceFile;
this.memoryLimit = 2
}
def initializeStandardDataFiles() = {
//
// Standard evaluation files for indels
//
addComp(new Comp("NA12878.homvar.GATK", "indels", "Indels.NA12878_WGS.filtered_Q50.0_QD5.0_SB-1.0_HR18.vcf", true))
addComp(new Comp("CG.38samples", "indels", "CG.Indels.leftAligned.b37.vcf"))
addComp(new Comp("NA12878.homvar.CG", "indels", "NA12878.CG.b37.indels.vcf", true))
addComp(new Comp("g1k.pilot1.validation", "indels", "pilot1_indel_validation_2009.b37.vcf"))
addComp(new Comp("NA12878.hand_curated", "indels", "NA12878.validated.curated.polymorphic.indels.vcf"))
addComp(new Comp("NA12878.Mullikin", "indels", "NA12878.DIPline.NQScm.expanded.chr20.b37.minReads_2_or_gt2bp.vcf"))
//
// INDEL call sets
//
if ( EVAL_STANDARD_1000G_CALLS ) {
addEval(new Eval("dindel", "indels", "20110208.chr20.dindel2.EUR.sites.vcf"))
addEval(new Eval("si", "indels", "20101123.chr20.si.v2.EUR.sites.vcf"))
addEval(new Eval("gatk", "indels", "EUR.phase1.chr20.broad.filtered.indels.sites.vcf"))
}
//
// Standard evaluation files for SNPs
//
addComp(new Comp("NA12878.homvar.GATK", "snps", "NA12878.HiSeq19.cut.vcf", true))
addComp(new Comp("CG.38samples", "snps", "CG.38samples.b37.vcf"))
addComp(new Comp("NA12878.homvar.CG", "snps", "NA12878.CG.b37.snps.vcf", true))
addComp(new Comp("HapMap3.3", "snps", "hapmap3.3.sites_r27_nr.b37_fwd.vcf"))
addComp(new Comp("OMNI.2.5M", "snps", "omni2.5.1212samples.b37.sites.chr20.monoAreAC0.vcf"))
addComp(new Comp("g1k.pilot1.validation", "snps", "1000G.snp.validation.b37.vcf"))
//
// SNP call sets
//
if ( EVAL_STANDARD_1000G_CALLS ) {
addEval(new Eval("1000G.gatk.eurPlus.phase1", "snps", "EUR+.phase1.chr20.broad.recal.vrcut1p0.sites.vcf"))
addEval(new Eval("1000G.high_specificity.phase1", "snps", "ALL.phase1.chr20.projectConsensus.highSpecificity.snps.genotypes.sites.vcf"))
}
}
def script = {
val sitesDir = new File(SITES_DIR)
if ( ! sitesDir.exists ) sitesDir.mkdirs()
initializeStandardDataFiles();
// add additional files for evaluation, if necessary
moreSNPsToEval.foreach(addEvalFromCMD(_, "snps"))
moreIndelsToEval.foreach(addEvalFromCMD(_, "indels"))
//
// create hom-var versions of key files
//
for ( comp <- COMPS )
if ( comp.MakeHomVar )
add(new SelectHomVars(comp.originalFile, comp.file))
for ( comp <- COMPS )
add(new JustSites(comp.file, comp.sitesFile))
//
// Loop over evaluation types
//
for ( evalType <- VARIANT_TYPES ) {
var evalsOfType = EVALS.filter(_.evalType == evalType)
val compsOfType = COMPS.filter(_.evalType == evalType)
if ( evalsOfType.size > 0 ) {
// if desired and possible, create a union.X.vcf file
if ( CREATE_UNION && evalsOfType.size > 1 ) {
val union: File = new File("union.%s.vcf".format(evalType))
add(new MyCombine(evalsOfType.map(_.file), union));
evalsOfType = new Eval("union", evalType, null, union) :: evalsOfType
}
// our root VE
val VE = new MyEval()
VE.VT = VARIANT_TYPE_VT(evalType)
VE.o = new File(evalType + ".eval")
// add evals
for ( calls <- evalsOfType )
VE.rodBind :+= RodBind("eval_" + calls.name, "VCF", calls.file)
// add comps
//VE.rodBind :+= RodBind("dbsnp", "VCF", MY_DBSNP)
for ( comp <- compsOfType )
VE.rodBind :+= RodBind("comp_" + comp.name, "VCF", comp.sitesFile)
add(VE)
}
}
}
/**
* Select homozygous non-reference sites from a single deep data set
*/
class SelectHomVars(@Input(doc="foo") vcf: File, @Output(doc="foo") out: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
this.rodBind :+= RodBind("variant", "VCF", vcf)
this.o = out
this.select ++= List("\"AC == 2\"")
}
/**
* A simple union
*/
class MyCombine(@Input(doc="foo") vcfs: List[File], @Output(doc="foo") out: File) extends CombineVariants with UNIVERSAL_GATK_ARGS {
for ( vcf <- vcfs )
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
this.o = out
}
/**
* A command line (cut) that removes all genotyping information from a file
*/
class JustSites(@Input(doc="foo") in: File, @Output(doc="foo") out: File) extends CommandLineFunction {
def commandLine = "cut -f 1-8 %s > %s".format(in, out)
}
/**
* Base class for VariantEval used here
*/
class MyEval() extends VariantEval with UNIVERSAL_GATK_ARGS {
this.noST = true
this.evalModule :+= "ValidationReport"
}
}

3
public/testdata/ceutrio.ped vendored 100644
View File

@ -0,0 +1,3 @@
fam1 kid dad mom 1 2
fam1 dad 0 0 1 1
fam1 mom 0 0 2 2