Further cleanup of Sample database
-- Removing more and more unnecessary code -- Partial removal of type safe Sample usage. On the road to SampleDB only
This commit is contained in:
parent
2a0cd556d3
commit
5c9227cf5e
|
|
@ -46,10 +46,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
|||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -1048,7 +1045,7 @@ public class GenomeAnalysisEngine {
|
|||
* Returns all samples that were referenced in the SAM file
|
||||
*/
|
||||
public Set<Sample> getSAMFileSamples() {
|
||||
return sampleDataSource.getSAMFileSamples();
|
||||
return sampleDataSource.getSamples(SampleUtils.getSAMFileSamples(getSAMFileHeader()));
|
||||
}
|
||||
|
||||
public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -18,12 +16,11 @@ public class Sample implements java.io.Serializable {
|
|||
final private Sample.Gender gender;
|
||||
final private double quantitativePhenotype;
|
||||
final private Sample.Affection affection;
|
||||
final private String population;
|
||||
final private String ID;
|
||||
final private SampleDataSource dataSource;
|
||||
|
||||
private boolean hasSAMFileEntry = false; // true if this sample has an entry in the SAM file
|
||||
private Map<String, Object> properties = new HashMap<String, Object>();
|
||||
// todo -- conditionally add the property map -- should be empty by default
|
||||
private final Map<String, Object> properties = new HashMap<String, Object>();
|
||||
|
||||
public enum Gender {
|
||||
MALE,
|
||||
|
|
@ -46,33 +43,31 @@ public class Sample implements java.io.Serializable {
|
|||
|
||||
public Sample(final String ID, final SampleDataSource dataSource,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final double quantitativePhenotype, final Affection affection,
|
||||
final String population) {
|
||||
final Gender gender, final double quantitativePhenotype, final Affection affection) {
|
||||
this.familyID = familyID;
|
||||
this.paternalID = paternalID;
|
||||
this.maternalID = maternalID;
|
||||
this.gender = gender;
|
||||
this.quantitativePhenotype = quantitativePhenotype;
|
||||
this.affection = affection;
|
||||
this.population = population;
|
||||
this.ID = ID;
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public Sample(final String ID, final SampleDataSource dataSource,
|
||||
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
|
||||
this(ID, dataSource, familyID, paternalID, maternalID, gender,
|
||||
UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN);
|
||||
}
|
||||
|
||||
public Sample(final String ID, final SampleDataSource dataSource, final double quantitativePhenotype, final Affection affection) {
|
||||
this(ID, dataSource, null, null, null, Gender.UNKNOWN, quantitativePhenotype, affection);
|
||||
}
|
||||
|
||||
public Sample(String id, SampleDataSource dataSource) {
|
||||
this(id, dataSource,
|
||||
null, null, null,
|
||||
Gender.UNKNOWN, UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN, null);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean hasSAMFileEntry() {
|
||||
return this.hasSAMFileEntry;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setSAMFileEntry(boolean value) {
|
||||
this.hasSAMFileEntry = value;
|
||||
Gender.UNKNOWN, UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
|
@ -115,7 +110,7 @@ public class Sample implements java.io.Serializable {
|
|||
* @return sample object with relationship mother, if exists, or null
|
||||
*/
|
||||
public Sample getMother() {
|
||||
return dataSource.getSampleById(maternalID);
|
||||
return dataSource.getSample(maternalID);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -123,7 +118,7 @@ public class Sample implements java.io.Serializable {
|
|||
* @return sample object with relationship father, if exists, or null
|
||||
*/
|
||||
public Sample getFather() {
|
||||
return dataSource.getSampleById(paternalID);
|
||||
return dataSource.getSample(paternalID);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -134,28 +129,10 @@ public class Sample implements java.io.Serializable {
|
|||
return gender;
|
||||
}
|
||||
|
||||
public String getPopulation() {
|
||||
return population;
|
||||
}
|
||||
|
||||
public String getFamilyId() {
|
||||
return familyID;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if sample is male, false if female, unknown, or null
|
||||
*/
|
||||
public boolean isMale() {
|
||||
return getGender() == Gender.MALE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if sample is female, false if male, unknown or null
|
||||
*/
|
||||
public boolean isFemale() {
|
||||
return getGender() == Gender.MALE;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// code for working with additional -- none standard -- properites
|
||||
|
|
@ -184,22 +161,20 @@ public class Sample implements java.io.Serializable {
|
|||
return properties.containsKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
Sample sample = (Sample) o;
|
||||
|
||||
if (hasSAMFileEntry != sample.hasSAMFileEntry) return false;
|
||||
if (ID != null ? !ID.equals(sample.ID) : sample.ID != null) return false;
|
||||
if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return ID != null ? ID.hashCode() : "".hashCode();
|
||||
}
|
||||
// @Override
|
||||
// public boolean equals(Object o) {
|
||||
// if (this == o) return true;
|
||||
// if (o == null || getClass() != o.getClass()) return false;
|
||||
//
|
||||
// Sample sample = (Sample) o;
|
||||
// if (ID != null ? !ID.equals(sample.ID) : sample.ID != null) return false;
|
||||
// if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
|
||||
//
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public int hashCode() {
|
||||
// return ID != null ? ID.hashCode() : "".hashCode();
|
||||
// }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -26,394 +25,121 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
public class SampleDataSource {
|
||||
|
||||
/**
|
||||
* SAMFileHeader that has been created for this analysis.
|
||||
*/
|
||||
private SAMFileHeader header;
|
||||
|
||||
/**
|
||||
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
|
||||
* this is stored as a HashMap.
|
||||
*/
|
||||
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
|
||||
|
||||
/**
|
||||
* Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
|
||||
* datasets. If this is the case, one ID is the "primary ID" and others are "aliases".
|
||||
*
|
||||
* This maps ID => primary ID for all samples ID strings - both primary IDs and aliases.
|
||||
*/
|
||||
private HashMap<String, String> sampleAliases = new HashMap<String, String>();
|
||||
|
||||
/**
|
||||
* Constructor takes both a SAM header and sample files because the two must be integrated.
|
||||
* @param header SAMFileHeader that has been created for this analysis
|
||||
* @param sampleFiles Sample files that were included on the command line
|
||||
*/
|
||||
public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
|
||||
this();
|
||||
this.header = header;
|
||||
// create empty sample object for each sample referenced in the SAM header
|
||||
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
|
||||
if (!hasSample(sampleName)) {
|
||||
Sample newSample = new Sample(sampleName, this);
|
||||
samples.put(sampleName, newSample);
|
||||
}
|
||||
}
|
||||
|
||||
// add files consecutively
|
||||
if (sampleFiles != null) {
|
||||
for (File file : sampleFiles) {
|
||||
addFile(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public SampleDataSource() {
|
||||
samples.put(null, new Sample(null, this));
|
||||
}
|
||||
|
||||
public SampleDataSource(final SAMFileHeader header, final List<File> sampleFiles) {
|
||||
this();
|
||||
addSamples(header);
|
||||
addSamples(sampleFiles);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions for adding samples to the DB
|
||||
//
|
||||
// TODO: these should be protected, really
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Hallucinates sample objects for all the samples in the SAM file and stores them
|
||||
*/
|
||||
public void addSamplesFromSAMHeader(SAMFileHeader header) {
|
||||
public SampleDataSource addSamples(SAMFileHeader header) {
|
||||
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
|
||||
if (!hasSample(sampleName)) {
|
||||
if (getSample(sampleName) == null) {
|
||||
Sample newSample = new Sample(sampleName, this);
|
||||
newSample.setSAMFileEntry(true);
|
||||
samples.put(sampleName, newSample);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDataSource addSamples(final List<File> sampleFiles) {
|
||||
// add files consecutively
|
||||
for (File file : sampleFiles) {
|
||||
addSamples(file);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse one sample file and integrate it with samples that are already there
|
||||
* Fail quickly if we find any errors in the file
|
||||
*/
|
||||
public void addFile(File sampleFile) {}
|
||||
//
|
||||
// BufferedReader reader;
|
||||
// try {
|
||||
// reader = new BufferedReader(new FileReader(sampleFile));
|
||||
// }
|
||||
// catch (IOException e) {
|
||||
// throw new StingException("Could not open sample file " + sampleFile.getAbsolutePath(), e);
|
||||
// }
|
||||
//
|
||||
// // set up YAML reader - a "Constructor" creates java object from YAML and "Loader" loads the file
|
||||
// Constructor con = new Constructor(SampleFileParser.class);
|
||||
// TypeDescription desc = new TypeDescription(SampleFileParser.class);
|
||||
// desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
|
||||
// desc.putListPropertyType("sampleAliases", SampleAlias.class);
|
||||
// con.addTypeDescription(desc);
|
||||
// Yaml yaml = new Yaml(con);
|
||||
//
|
||||
// // SampleFileParser stores an object representation of a sample file - this is what we'll parse
|
||||
// SampleFileParser parser;
|
||||
// try {
|
||||
// parser = (SampleFileParser) yaml.load(reader);
|
||||
// }
|
||||
// catch (Exception e) {
|
||||
// throw new StingException("There was a syntactic error with the YAML in sample file " + sampleFile.getAbsolutePath(), e);
|
||||
// }
|
||||
//
|
||||
// // check to see which validation options were built into the file
|
||||
// boolean restrictProperties = parser.getAllowedProperties() != null;
|
||||
// boolean restrictRelationships = parser.getAllowedRelationships() != null;
|
||||
// boolean restrictPropertyValues = parser.getPropertyDefinitions() != null;
|
||||
//
|
||||
// // propertyValues stores the values that are allowed for a given property
|
||||
// HashMap<String, HashSet> propertyValues = null;
|
||||
// if (restrictPropertyValues) {
|
||||
// propertyValues = new HashMap<String, HashSet>();
|
||||
// for (PropertyDefinition def : parser.getPropertyDefinitions()) {
|
||||
// HashSet<String> set = new HashSet<String>();
|
||||
// for (String value : def.getValues()) {
|
||||
// set.add(value);
|
||||
// }
|
||||
// propertyValues.put(def.getProperty(), set);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // make sure the aliases are valid
|
||||
// validateAliases(parser);
|
||||
//
|
||||
// // loop through each sample in the file - a SampleParser stores an object that will become a Sample
|
||||
// for (SampleParser sampleParser : parser.getSamples()) {
|
||||
//
|
||||
// try {
|
||||
// // step 1: add the sample if it doesn't already exist
|
||||
// Sample sample = getSampleById(sampleParser.getID());
|
||||
// if (sample == null) {
|
||||
// sample = new Sample(sampleParser.getID());
|
||||
// }
|
||||
// addSample(sample);
|
||||
// sample.setSampleFileEntry(true);
|
||||
//
|
||||
// // step 2: add the properties
|
||||
// if (sampleParser.getProperties() != null) {
|
||||
// for (String property : sampleParser.getProperties().keySet()) {
|
||||
//
|
||||
// // check that property is allowed
|
||||
// if (restrictProperties) {
|
||||
// if (!isPropertyValid(property, parser.getAllowedProperties())) {
|
||||
// throw new StingException(property + " is an invalid property. It is not included in the list " +
|
||||
// "of allowed properties.");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // next check that the value is allowed
|
||||
// if (restrictPropertyValues) {
|
||||
// if (!isValueAllowed(property, sampleParser.getProperties().get(property), propertyValues)) {
|
||||
// throw new StingException("The value of property '" + property + "' is invalid. " +
|
||||
// "It is not included in the list of allowed values for this property.");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // next check that there isn't already a conflicting property there
|
||||
// if (sample.getProperty(property) != null &&
|
||||
// sample.getProperty(property) != sampleParser.getProperties().get(property))
|
||||
// {
|
||||
// throw new StingException(property + " is a conflicting property!");
|
||||
// }
|
||||
//
|
||||
// // checks are passed - now add the property!
|
||||
// saveProperty(sample, property, sampleParser.getProperties().get(property));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // step 3: add the relationships
|
||||
// if (sampleParser.getRelationships() != null) {
|
||||
// for (String relationship : sampleParser.getRelationships().keySet()) {
|
||||
// String relativeId = sampleParser.getRelationships().get(relationship);
|
||||
// if (relativeId == null) {
|
||||
// throw new StingException("The relationship cannot be null");
|
||||
// }
|
||||
//
|
||||
// // first check that it's not invalid
|
||||
// if (restrictRelationships) {
|
||||
// if (!isRelationshipValid(relationship, parser.getAllowedRelationships())) {
|
||||
// throw new StingException(relationship + " is an invalid relationship");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // next check that there isn't already a conflicting property there
|
||||
// if (sample.getRelationship(relationship) != null) {
|
||||
// if (sample.getRelationship(relationship).getID() != sampleParser.getProperties().get(relationship)) {
|
||||
// throw new StingException(relationship + " is a conflicting relationship!");
|
||||
// }
|
||||
// // if the relationship is already set - and consistent with what we're reading now - no need to continue
|
||||
// else {
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // checks are passed - now save the relationship
|
||||
// saveRelationship(sample, relationship, relativeId);
|
||||
// }
|
||||
// }
|
||||
// } catch (Exception e) {
|
||||
// throw new StingException("An error occurred while loading this sample from the sample file: " +
|
||||
// sampleParser.getID(), e);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private boolean isValueAllowed(String key, Object value, HashMap<String, HashSet> valuesList) {
|
||||
//
|
||||
// // if the property values weren't specified for this property, then any value is okay
|
||||
// if (!valuesList.containsKey(key)) {
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// // if this property has enumerated values, it must be a string
|
||||
// else if (value.getClass() != String.class)
|
||||
// return false;
|
||||
//
|
||||
// // is the value specified or not?
|
||||
// else if (!valuesList.get(key).contains(value))
|
||||
// return false;
|
||||
//
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Makes sure that the aliases are valid
|
||||
// * Checks that 1) no string is used as both a main ID and an alias;
|
||||
// * 2) no alias is used more than once
|
||||
// * @param parser
|
||||
// */
|
||||
// private void validateAliases(SampleFileParser parser) {
|
||||
//
|
||||
// // no aliases sure validate
|
||||
// if (parser.getSampleAliases() == null)
|
||||
// return;
|
||||
//
|
||||
// HashSet<String> mainIds = new HashSet<String>();
|
||||
// HashSet<String> otherIds = new HashSet<String>();
|
||||
//
|
||||
// for (SampleAlias sampleAlias : parser.getSampleAliases()) {
|
||||
// mainIds.add(sampleAlias.getMainId());
|
||||
// for (String otherId : sampleAlias.getOtherIds()) {
|
||||
// if (mainIds.contains(otherId))
|
||||
// throw new StingException(String.format("The aliases in your sample file are invalid - the alias %s cannot " +
|
||||
// "be both a main ID and an other ID", otherId));
|
||||
//
|
||||
// if (!otherIds.add(otherId))
|
||||
// throw new StingException(String.format("The aliases in your sample file are invalid - %s is listed as an " +
|
||||
// "alias more than once.", otherId));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private boolean isPropertyValid(String property, String[] allowedProperties) {
|
||||
//
|
||||
// // is it a special property that is always allowed?
|
||||
// for (String allowedProperty : specialProperties) {
|
||||
// if (property.equals(allowedProperty))
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// // is it in the allowed properties list?
|
||||
// for (String allowedProperty : allowedProperties) {
|
||||
// if (property.equals(allowedProperty))
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// private boolean isRelationshipValid(String relationship, String[] allowedRelationships) {
|
||||
//
|
||||
// // is it a special relationship that is always allowed?
|
||||
// for (String allowedRelationship : specialRelationships) {
|
||||
// if (relationship.equals(allowedRelationship))
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// // is it in the allowed properties list?
|
||||
// for (String allowedRelationship : allowedRelationships) {
|
||||
// if (relationship.equals(allowedRelationship))
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Saves a property as the correct type
|
||||
// * @param key property key
|
||||
// * @param value property value, as read from YAML parser
|
||||
// * @return property value to be stored
|
||||
// */
|
||||
// private void saveProperty(Sample sample, String key, Object value) {
|
||||
//
|
||||
// // convert gender to the right type, if it was stored as a String
|
||||
// if (key.equals("gender")) {
|
||||
// if (((String) value).toLowerCase().equals("male")) {
|
||||
// value = Sample.Gender.MALE;
|
||||
// }
|
||||
// else if (((String) value).toLowerCase().equals("female")) {
|
||||
// value = Sample.Gender.FEMALE;
|
||||
// }
|
||||
// else if (((String) value).toLowerCase().equals("unknown")) {
|
||||
// value = Sample.Gender.UNKNOWN;
|
||||
// }
|
||||
// else if (value != null) {
|
||||
// throw new StingException("'gender' property must be male, female, or unknown.");
|
||||
// }
|
||||
// }
|
||||
// try {
|
||||
// sample.setProperty(key, value);
|
||||
// }
|
||||
// catch (Exception e) {
|
||||
// throw new StingException("Could not save property " + key, e);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Saves a relationship as the correct type
|
||||
// * @param key relationship key
|
||||
// * @param relativeId sample ID string of the relative
|
||||
// * @return relationship value to be stored
|
||||
// */
|
||||
// private void saveRelationship(Sample sample, String key, String relativeId) {
|
||||
//
|
||||
// // get the reference that we'll store as the value
|
||||
// Sample relative = getSampleById(relativeId);
|
||||
//
|
||||
// // create sample object for the relative, if necessary
|
||||
// if (relative == null) {
|
||||
// relative = new Sample(relativeId);
|
||||
// addSample(relative);
|
||||
// }
|
||||
// sample.setRelationship(key, relative);
|
||||
// }
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Filter a sample name in case it is an alias
|
||||
* @param sampleId to be filtered
|
||||
* @return ID of sample that stores data for this alias
|
||||
*/
|
||||
private String aliasFilter(String sampleId) {
|
||||
if (!sampleAliases.containsKey(sampleId))
|
||||
return sampleId;
|
||||
else
|
||||
return sampleAliases.get(sampleId);
|
||||
public SampleDataSource addSamples(File sampleFile) {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a sample to the collection
|
||||
* @param sample to be added
|
||||
*/
|
||||
private void addSample(Sample sample) {
|
||||
private SampleDataSource addSample(Sample sample) {
|
||||
samples.put(sample.getID(), sample);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if sample with this ID exists
|
||||
* Note that this will return true if name passed in is an alias
|
||||
* @param id ID of sample to be checked
|
||||
* @return true if sample exists; false if not
|
||||
*/
|
||||
public boolean hasSample(String id) {
|
||||
return samples.get(aliasFilter(id)) != null;
|
||||
}
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions for getting a sample from the DB
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get a sample by its ID
|
||||
* If an alias is passed in, return the main sample object
|
||||
* @param id
|
||||
* @return sample Object with this ID
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSampleById(String id) {
|
||||
return samples.get(aliasFilter(id));
|
||||
public Sample getSample(String id) {
|
||||
return samples.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample for a given read group
|
||||
* Must first look up ID for read group
|
||||
* @param readGroup of sample
|
||||
* @return sample object with ID from the read group
|
||||
*
|
||||
* @param read
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
|
||||
String nameFromReadGroup = readGroup.getSample();
|
||||
return getSampleById(nameFromReadGroup);
|
||||
public Sample getSample(final SAMRecord read) {
|
||||
return getSample(read.getReadGroup());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a sample for a given read
|
||||
* Must first look up read group, and then sample ID for that read group
|
||||
* @param read of sample
|
||||
* @return sample object of this read
|
||||
*
|
||||
* @param rg
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSampleByRead(SAMRecord read) {
|
||||
return getSampleByReadGroup(read.getReadGroup());
|
||||
public Sample getSample(final SAMReadGroupRecord rg) {
|
||||
return getSample(rg.getSample());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param g Genotype
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(final Genotype g) {
|
||||
return getSample(g.getSampleName());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions for accessing samples in the DB
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Get number of sample objects
|
||||
* @return size of samples map
|
||||
|
|
@ -469,10 +195,10 @@ public class SampleDataSource {
|
|||
* @return Corresponding set of samples
|
||||
*/
|
||||
public Set<Sample> getSamples(Collection<String> sampleNameList) {
|
||||
HashSet<Sample> samples = new HashSet<Sample>();
|
||||
HashSet<Sample> samples = new HashSet<Sample>();
|
||||
for (String name : sampleNameList) {
|
||||
try {
|
||||
samples.add(getSampleById(name));
|
||||
samples.add(getSample(name));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("Could not get sample with the following ID: " + name, e);
|
||||
|
|
@ -480,91 +206,4 @@ public class SampleDataSource {
|
|||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have any value (which could be null) for a given property
|
||||
* @param key Property key
|
||||
* @return Set of samples with the property
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key) {
|
||||
HashSet<Sample> toReturn = new HashSet<Sample>();
|
||||
for (Sample s : samples.values()) {
|
||||
if (s.hasExtraProperty(key))
|
||||
toReturn.add(s);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have a property with a certain value
|
||||
* Value must be a string for now - could add a similar method for matching any objects in the future
|
||||
*
|
||||
* @param key Property key
|
||||
* @param value String property value
|
||||
* @return Set of samples that match key and value
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key, String value) {
|
||||
Set<Sample> toReturn = getSamplesWithProperty(key);
|
||||
for (Sample s : toReturn) {
|
||||
if (!s.getExtraPropertyValue(key).equals(value))
|
||||
toReturn.remove(s);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public Sample getOrCreateSample(String id) {
|
||||
Sample sample = getSampleById(id);
|
||||
if (sample == null) {
|
||||
sample = new Sample(id, this);
|
||||
addSample(sample);
|
||||
}
|
||||
return sample;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all samples that were referenced in the SAM file
|
||||
*/
|
||||
public Set<Sample> getSAMFileSamples() {
|
||||
Set<Sample> toReturn = new HashSet<Sample>();
|
||||
for (Sample sample : samples.values()) {
|
||||
if (sample.hasSAMFileEntry())
|
||||
toReturn.add(sample);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of sample objects for the sample names in a variant context
|
||||
*
|
||||
* @param context Any variant context
|
||||
* @return a set of the sample objects
|
||||
*/
|
||||
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
|
||||
Set<Sample> samples = new HashSet<Sample>();
|
||||
for (String sampleName : context.getSampleNames()) {
|
||||
samples.add(getOrCreateSample(sampleName));
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a subcontext restricted to samples with a given property key/value
|
||||
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
|
||||
* @param context VariantContext to filter
|
||||
* @param key property key
|
||||
* @param value property value (must be string)
|
||||
* @return subcontext
|
||||
*/
|
||||
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
|
||||
|
||||
Set<String> samplesWithProperty = new HashSet<String>();
|
||||
for (String sampleName : context.getSampleNames()) {
|
||||
Sample s = samples.get(sampleName);
|
||||
if (s != null && s.hasExtraProperty(key) && s.getExtraPropertyValue(key).equals(value))
|
||||
samplesWithProperty.add(sampleName);
|
||||
}
|
||||
Map<String, Genotype> genotypes = context.getGenotypes(samplesWithProperty);
|
||||
return context.subContextFromGenotypes(genotypes.values());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -93,8 +92,8 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
return getToolkit().getSampleDB();
|
||||
}
|
||||
|
||||
protected Sample getSampleByID(final String id) {
|
||||
return getToolkit().getSampleDB().getSampleById(id);
|
||||
protected Sample getSample(final String id) {
|
||||
return getToolkit().getSampleDB().getSample(id);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -247,7 +248,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes();
|
||||
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
|
||||
for ( String sample : samples ) {
|
||||
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSampleByID(sample).isMale();
|
||||
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Sample.Gender.MALE;
|
||||
|
||||
Genotype genotype;
|
||||
boolean isValidation;
|
||||
|
|
|
|||
|
|
@ -1095,14 +1095,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
// filter the read-base pileup based on min base and mapping qualities:
|
||||
pileup = pileup.getBaseAndMappingFilteredPileup(MIN_BASE_QUALITY_SCORE, MIN_MAPPING_QUALITY_SCORE);
|
||||
if (pileup != null) {
|
||||
for (Sample sample : pileup.getSamples()) {
|
||||
ReadBackedPileup samplePileup = pileup.getPileupForSample(sample);
|
||||
for (final String sample : pileup.getSampleNames()) {
|
||||
ReadBackedPileup samplePileup = pileup.getPileupForSampleName(sample);
|
||||
ReadBasesAtPosition readBases = new ReadBasesAtPosition();
|
||||
for (PileupElement p : samplePileup) {
|
||||
if (!p.isDeletion()) // IGNORE deletions for now
|
||||
readBases.putReadBase(p);
|
||||
}
|
||||
sampleReadBases.put(sample.getID(), readBases);
|
||||
sampleReadBases.put(sample, readBases);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Extends locus walker to print how many reads there are at each locus, by population
|
||||
*/
|
||||
public class CountLociByPopulationWalker extends LocusWalker<Integer, Long> implements TreeReducible<Long> {
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
// in this HashMap, we'll keep count of how many
|
||||
HashMap<String, Integer> count = new HashMap<String, Integer>();
|
||||
|
||||
ArrayList<SAMRecord> reads = (ArrayList) context.getBasePileup().getReads();
|
||||
|
||||
for (SAMRecord read : reads) {
|
||||
|
||||
// get the sample
|
||||
Sample sample = getSampleDB().getSampleByRead(read);
|
||||
if (sample == null)
|
||||
return 1;
|
||||
|
||||
if (!count.containsKey(sample.getPopulation())) {
|
||||
count.put(sample.getPopulation(), 1);
|
||||
}
|
||||
count.put(sample.getPopulation(), count.get(sample.getPopulation()) + 1);
|
||||
}
|
||||
|
||||
System.out.println("\n\n\n***** LOCUS: " + ref.getLocus().toString() + " *****");
|
||||
for (String population : count.keySet()) {
|
||||
System.out.println(String.format("%s | %d", population, count.get(population)));
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Long reduceInit() { return 0l; }
|
||||
|
||||
public Long reduce(Integer value, Long sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces two subtrees together. In this case, the implementation of the tree reduce
|
||||
* is exactly the same as the implementation of the single reduce.
|
||||
*/
|
||||
public Long treeReduce(Long lhs, Long rhs) {
|
||||
return lhs + rhs;
|
||||
}
|
||||
}
|
||||
|
|
@ -16,8 +16,8 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
|
|||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountMalesWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
Sample sample = getSampleDB().getSampleByRead(read);
|
||||
return sample.isMale() ? 1 : 0;
|
||||
Sample sample = getSampleDB().getSample(read);
|
||||
return sample.getGender() == Sample.Gender.MALE ? 1 : 0;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
|
|
|||
|
|
@ -228,7 +228,6 @@ public class PedReader {
|
|||
|
||||
public void fillSampleDB(SampleDataSource db) {
|
||||
for ( final PedRecord rec : getRecords() ) {
|
||||
Sample s = db.getOrCreateSample(rec.individualID);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -570,16 +570,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Sample> getSamples() {
|
||||
if(!(pileupElementTracker instanceof PerSamplePileupElementTracker)) {
|
||||
throw new StingException("Must be an instance of PerSampleElementTracker");
|
||||
}
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
return tracker.getSamples();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
|
|
@ -688,30 +678,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public RBP getPileupForSample(Sample sample) {
|
||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||
PileupElementTracker<PE> filteredElements = tracker.getElements(sample);
|
||||
return filteredElements != null ? (RBP)createNewPileup(loc,filteredElements) : null;
|
||||
}
|
||||
else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
SAMRecord read = p.getRead();
|
||||
if(sample != null) {
|
||||
if(read.getReadGroup() != null && sample.getID().equals(read.getReadGroup().getSample()))
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
else {
|
||||
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
|
||||
filteredTracker.add(p);
|
||||
}
|
||||
}
|
||||
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
|
|
|
|||
|
|
@ -123,19 +123,6 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
|
|||
*/
|
||||
public Collection<String> getSampleNames();
|
||||
|
||||
/**
|
||||
* Gets a list of all the samples stored in this pileup.
|
||||
* @return List of samples in this pileup.
|
||||
*/
|
||||
public Collection<Sample> getSamples();
|
||||
|
||||
/**
|
||||
* Gets the particular subset of this pileup with the given sample name.
|
||||
* @param sample Name of the sample to use.
|
||||
* @return A subset of this pileup containing only reads with the given sample.
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup getPileupForSample(Sample sample);
|
||||
|
||||
public Iterable<ExtendedEventPileupElement> toExtendedIterable();
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -137,13 +137,6 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
|
|||
*/
|
||||
public ReadBackedPileup getPileupForLane(String laneID);
|
||||
|
||||
|
||||
/**
|
||||
* Gets a collection of all the samples stored in this pileup.
|
||||
* @return Collection of samples in this pileup.
|
||||
*/
|
||||
public Collection<Sample> getSamples();
|
||||
|
||||
/**
|
||||
* Gets a collection of *names* of all the samples stored in this pileup.
|
||||
* @return Collection of names
|
||||
|
|
@ -165,13 +158,6 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
|
|||
* @return A subset of this pileup containing only reads with the given sample.
|
||||
*/
|
||||
public ReadBackedPileup getPileupForSampleName(String sampleName);
|
||||
|
||||
/**
|
||||
* Gets the particular subset of this pileup with the given sample.
|
||||
* @param sample Sample to use.
|
||||
* @return A subset of this pileup containing only reads with the given sample.
|
||||
*/
|
||||
public ReadBackedPileup getPileupForSample(Sample sample);
|
||||
|
||||
/**
|
||||
* Simple useful routine to count the number of deletion bases in this pileup
|
||||
|
|
|
|||
|
|
@ -85,8 +85,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
|||
(byte)0);
|
||||
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
SampleDataSource sampleDataSource = new SampleDataSource();
|
||||
sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
|
||||
SampleDataSource sampleDataSource = new SampleDataSource().addSamples(reader.getFileHeader());
|
||||
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ import java.util.*;
|
|||
* Time: 8:21:00 AM
|
||||
*/
|
||||
public class SampleDataSourceUnitTest extends BaseTest {
|
||||
|
||||
// this empty header used to instantiate sampledatasource objects
|
||||
private static SAMFileHeader header = new SAMFileHeader();
|
||||
|
||||
|
|
@ -32,210 +31,4 @@ public class SampleDataSourceUnitTest extends BaseTest {
|
|||
public void loadSAMSamplesTest() {
|
||||
SampleDataSource s = new SampleDataSource(header, null);
|
||||
}
|
||||
|
||||
// tests that a basic sample with relationships loads correctly
|
||||
// Note that this is the only test for family relationships - we may want to expand this
|
||||
@Test()
|
||||
public void basicLoadSampleFileTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
Assert.assertTrue(s.sampleCount() == 5);
|
||||
Sample sampleA = s.getSampleById("sampleA");
|
||||
Sample sampleB = s.getSampleById("sampleB");
|
||||
Assert.assertTrue(sampleB.getMother() == sampleA);
|
||||
Assert.assertTrue(s.getChildren(sampleA).contains(sampleB));
|
||||
Set<Sample> family = s.getFamily("family1");
|
||||
Assert.assertTrue(family.size() == 2);
|
||||
Assert.assertTrue(family.contains(sampleA));
|
||||
Assert.assertTrue(family.contains(sampleB));
|
||||
}
|
||||
|
||||
// but that file should fail if it has an extra character in it...
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void loadInvalidSampleExtraCharText() {
|
||||
File sampleFile = new File(sampleFilesDir + "invalidSyntaxExtraChar.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
}
|
||||
|
||||
// ...or a typo...
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void loadInvalidSampleTypoText() {
|
||||
File sampleFile = new File(sampleFilesDir + "invalidSyntaxTypo.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
|
||||
}
|
||||
|
||||
// ...or an extra unrecognized array
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void loadInvalidSampleExtraArrayText() {
|
||||
File sampleFile = new File(sampleFilesDir + "invalidSyntaxExtraArray.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
}
|
||||
|
||||
// make sure aliases work
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void sampleAliasText() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFileWithAlias.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
// this file has two samples, but one has an alias. let's make sure that checks out...
|
||||
Assert.assertTrue(s.sampleCount() == 3);
|
||||
Assert.assertTrue(s.getSampleById("sampleA") == s.getSampleById("sampleC"));
|
||||
}
|
||||
|
||||
// error is thrown if property is included that's not in properties array
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void unallowedPropertySampleTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFileUnallowedProperty.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
}
|
||||
|
||||
// same as above, with relationship
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void unallowedRelationshipSampleTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFileUnallowedRelationship.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
}
|
||||
|
||||
// two sample files
|
||||
@Test()
|
||||
public void twoSampleFilesTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
|
||||
File secondFile = new File(sampleFilesDir + "basicSampleFileExt.yaml");
|
||||
ArrayList<File> files = new ArrayList<File>();
|
||||
files.add(sampleFile);
|
||||
files.add(secondFile);
|
||||
SampleDataSource s = new SampleDataSource(header, files);
|
||||
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propC").equals("valC"));
|
||||
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
|
||||
}
|
||||
|
||||
// two sample files, with contradictory properties
|
||||
@Test(expectedExceptions=StingException.class)
|
||||
public void twoContradictorySampleFilesTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
|
||||
File secondFile = new File(sampleFilesDir + "basicSampleFileInvalidExt.yaml");
|
||||
ArrayList<File> files = new ArrayList<File>();
|
||||
files.add(sampleFile);
|
||||
files.add(secondFile);
|
||||
SampleDataSource s = new SampleDataSource(header, files);
|
||||
}
|
||||
|
||||
// three sample files
|
||||
@Test()
|
||||
public void threeSamplesTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
|
||||
ArrayList<File> files = new ArrayList<File>();
|
||||
files.add(sampleFile);
|
||||
files.add(new File(sampleFilesDir + "basicSampleFileExt.yaml"));
|
||||
files.add(new File(sampleFilesDir + "basicSampleFileExt2.yaml"));
|
||||
SampleDataSource s = new SampleDataSource(header, files);
|
||||
Assert.assertTrue(s.sampleCount() == 6);
|
||||
Assert.assertTrue(s.getSampleById("sampleE").getProperty("propC").equals("valC"));
|
||||
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
|
||||
}
|
||||
|
||||
/**
|
||||
* testing getSamplesWithProperty
|
||||
* in this file there are 5 samples - 2 with population "CEU", 1 with population "ABC", 1 with no population,
|
||||
* and then the default null sample
|
||||
*/
|
||||
@Test()
|
||||
public void getSamplesWithPropertyTest() {
|
||||
File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
Assert.assertTrue(s.sampleCount() == 5);
|
||||
Set<Sample> ceuSamples = s.getSamplesWithProperty("population", "CEU");
|
||||
Assert.assertTrue(ceuSamples.size() == 2);
|
||||
|
||||
Iterator<Sample> i = ceuSamples.iterator();
|
||||
ArrayList<String> sampleNames = new ArrayList<String>();
|
||||
sampleNames.add(i.next().getID());
|
||||
sampleNames.add(i.next().getID());
|
||||
Assert.assertTrue(sampleNames.contains("sampleA"));
|
||||
Assert.assertTrue(sampleNames.contains("sampleB"));
|
||||
}
|
||||
|
||||
// make sure we can import data types other than Strings
|
||||
@Test()
|
||||
public void sampleTestPropertyType() {
|
||||
File sampleFile = new File(sampleFilesDir + "sampleFileOtherTypes.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
Sample sample = s.getSampleById("sampleA");
|
||||
Assert.assertTrue(sample.getProperty("a").getClass() == Integer.class);
|
||||
Assert.assertTrue(sample.getProperty("b").getClass() == String.class);
|
||||
Assert.assertTrue(sample.getProperty("c").getClass() == Double.class);
|
||||
Assert.assertTrue(sample.getProperty("b").getClass() == String.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* check that getSamplesFromVariantContext works
|
||||
* create a variant context with two sample names, and make sure the right samples are there
|
||||
*/
|
||||
@Test()
|
||||
public void variantContextTest() {
|
||||
SampleDataSource s = new SampleDataSource(header, null);
|
||||
List<Allele> alleleCollection = new ArrayList<Allele>();
|
||||
Allele a1 = Allele.create("A", true);
|
||||
alleleCollection.add(a1);
|
||||
|
||||
Set<Genotype> genotypeCollection = new HashSet<Genotype>();
|
||||
genotypeCollection.add(new Genotype("NA123", alleleCollection));
|
||||
genotypeCollection.add(new Genotype("NA456", alleleCollection));
|
||||
|
||||
VariantContext v = new VariantContext("contextName", "chr1", 1, 1, alleleCollection, genotypeCollection);
|
||||
|
||||
// make sure the set that's returned is the right size
|
||||
HashSet<Sample> set = (HashSet) s.getSamplesByVariantContext(v);
|
||||
Assert.assertTrue(set.size() == 2);
|
||||
|
||||
// make sure both samples are included
|
||||
Iterator<Sample> i = set.iterator();
|
||||
ArrayList<String> sampleNames = new ArrayList<String>();
|
||||
sampleNames.add(i.next().getID());
|
||||
sampleNames.add(i.next().getID());
|
||||
Assert.assertTrue(sampleNames.contains("NA123"));
|
||||
Assert.assertTrue(sampleNames.contains("NA456"));
|
||||
}
|
||||
|
||||
/**
|
||||
* checking subContextFromSampleProperty
|
||||
*/
|
||||
|
||||
/**
|
||||
* check that subContextFromSampleProperty works
|
||||
* create a variant context with four sample names, make sure that it filters correctly to 2
|
||||
*/
|
||||
@Test()
|
||||
public void subContextFromSamplePropertyTest() {
|
||||
|
||||
File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
|
||||
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
|
||||
Assert.assertTrue(s.sampleCount() == 5);
|
||||
|
||||
List<Allele> alleleCollection = new ArrayList<Allele>();
|
||||
Allele a1 = Allele.create("A", true);
|
||||
alleleCollection.add(a1);
|
||||
|
||||
Set<Genotype> genotypeCollection = new HashSet<Genotype>();
|
||||
genotypeCollection.add(new Genotype("NA123", alleleCollection));
|
||||
genotypeCollection.add(new Genotype("sampleA", alleleCollection));
|
||||
genotypeCollection.add(new Genotype("sampleB", alleleCollection));
|
||||
genotypeCollection.add(new Genotype("sampleC", alleleCollection));
|
||||
|
||||
VariantContext v = new VariantContext("contextName", "chr1", 1, 1, alleleCollection, genotypeCollection);
|
||||
VariantContext subContext = s.subContextFromSampleProperty(v, "population", "CEU");
|
||||
|
||||
Assert.assertTrue(subContext.getSampleNames().contains("sampleA"));
|
||||
Assert.assertTrue(subContext.getSampleNames().contains("sampleA"));
|
||||
Assert.assertTrue(subContext.getSampleNames().size() == 2);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// we create lots of single item lists...
|
||||
private ArrayList<File> makeFileList(File file) {
|
||||
ArrayList<File> a = new ArrayList<File>();
|
||||
a.add(file);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,41 +13,26 @@ import org.testng.annotations.Test;
|
|||
* Time: 8:21:00 AM
|
||||
*/
|
||||
public class SampleUnitTest extends BaseTest {
|
||||
|
||||
static Sample sampleA;
|
||||
static Sample sampleA1;
|
||||
static Sample sampleB;
|
||||
static Sample sampleC;
|
||||
SampleDataSource db;
|
||||
static Sample fam1A, fam1B, fam1C;
|
||||
static Sample s1, s2;
|
||||
static Sample trait1, trait2, trait3, trait4;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
sampleA = new Sample("sampleA");
|
||||
sampleA.setProperty("uniqueProperty", "uniqueValue");
|
||||
sampleA1 = new Sample("sampleA");
|
||||
sampleA1.setProperty("uniqueProperty", "uniqueValue");
|
||||
sampleB = new Sample("sampleB");
|
||||
sampleC = new Sample("sampleC");
|
||||
sampleC.setProperty("population", "pop1");
|
||||
sampleC.setProperty("gender", Sample.Gender.MALE);
|
||||
}
|
||||
db = new SampleDataSource();
|
||||
|
||||
/**
|
||||
* Testing equality
|
||||
*/
|
||||
@Test()
|
||||
public void equalsTest() {
|
||||
Assert.assertTrue(sampleA.equals(sampleA1));
|
||||
Assert.assertFalse(sampleA == sampleA1);
|
||||
Assert.assertFalse(sampleA.equals(sampleB));
|
||||
}
|
||||
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Sample.Gender.UNKNOWN);
|
||||
fam1B = new Sample("1B", db, "fam1", null, null, Sample.Gender.MALE);
|
||||
fam1C = new Sample("1C", db, "fam1", null, null, Sample.Gender.FEMALE);
|
||||
|
||||
/**
|
||||
* And hash
|
||||
*/
|
||||
@Test()
|
||||
public void basicHashTest() {
|
||||
Assert.assertFalse(sampleA.hashCode() == sampleB.hashCode());
|
||||
Assert.assertTrue(sampleA.hashCode() == sampleA1.hashCode());
|
||||
s1 = new Sample("s1", db);
|
||||
s2 = new Sample("s2", db);
|
||||
|
||||
trait1 = new Sample("t1", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.AFFECTED);
|
||||
trait2 = new Sample("t2", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNAFFECTED);
|
||||
trait3 = new Sample("t3", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNKNOWN);
|
||||
trait4 = new Sample("t4", db, 1.0, Sample.Affection.QUANTITATIVE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -55,10 +40,15 @@ public class SampleUnitTest extends BaseTest {
|
|||
*/
|
||||
@Test()
|
||||
public void specialGettersTest() {
|
||||
Assert.assertTrue(sampleC.getID().equals("sampleC"));
|
||||
Assert.assertTrue(sampleC.getPopulation().equals("pop1"));
|
||||
Assert.assertTrue(sampleC.isMale());
|
||||
Assert.assertFalse(sampleA.isMale()); // sample A doesn't have a gender, so this should be false
|
||||
// todo -- test for sample with extra properties, like population
|
||||
// Assert.assertTrue(sampleC.getID().equals("sampleC"));
|
||||
// Assert.assertTrue(sampleC.getPopulation().equals("pop1"));
|
||||
}
|
||||
|
||||
}
|
||||
@Test()
|
||||
public void testGenders() {
|
||||
Assert.assertTrue(fam1A.getGender() == Sample.Gender.UNKNOWN);
|
||||
Assert.assertTrue(fam1B.getGender() == Sample.Gender.MALE);
|
||||
Assert.assertTrue(fam1C.getGender() == Sample.Gender.FEMALE);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -142,8 +142,8 @@ public class ReadBackedPileupUnitTest {
|
|||
Arrays.asList(read2,read4),
|
||||
Arrays.asList(1,1));
|
||||
Map<Sample,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<Sample,ReadBackedPileupImpl>();
|
||||
sampleToPileupMap.put(new Sample(readGroupOne.getSample()),sample1Pileup);
|
||||
sampleToPileupMap.put(new Sample(readGroupTwo.getSample()),sample2Pileup);
|
||||
sampleToPileupMap.put(new Sample(readGroupOne.getSample(), null),sample1Pileup);
|
||||
sampleToPileupMap.put(new Sample(readGroupTwo.getSample(), null),sample2Pileup);
|
||||
|
||||
ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
|
||||
|
||||
|
|
@ -164,8 +164,8 @@ public class ReadBackedPileupUnitTest {
|
|||
|
||||
@Test
|
||||
public void testGetPileupForSample() {
|
||||
Sample sample1 = new Sample("sample1");
|
||||
Sample sample2 = new Sample("sample2");
|
||||
Sample sample1 = new Sample("sample1", null);
|
||||
Sample sample2 = new Sample("sample2", null);
|
||||
|
||||
SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
|
||||
readGroupOne.setSample(sample1.getID());
|
||||
|
|
@ -187,15 +187,11 @@ public class ReadBackedPileupUnitTest {
|
|||
|
||||
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
|
||||
|
||||
ReadBackedPileup sample1Pileup = pileup.getPileupForSample(sample1);
|
||||
Assert.assertEquals(sample1Pileup.size(),1,"Sample 1 pileup has wrong number of elements");
|
||||
Assert.assertEquals(sample1Pileup.getReads().get(0),read1,"Sample 1 pileup has incorrect read");
|
||||
|
||||
ReadBackedPileup sample2Pileup = pileup.getPileupForSampleName(sample2.getID());
|
||||
Assert.assertEquals(sample2Pileup.size(),1,"Sample 2 pileup has wrong number of elements");
|
||||
Assert.assertEquals(sample2Pileup.getReads().get(0),read2,"Sample 2 pileup has incorrect read");
|
||||
|
||||
ReadBackedPileup missingSamplePileup = pileup.getPileupForSample(new Sample("missing"));
|
||||
ReadBackedPileup missingSamplePileup = pileup.getPileupForSampleName("missing");
|
||||
Assert.assertNull(missingSamplePileup,"Pileup for sample 'missing' should be null but isn't");
|
||||
|
||||
missingSamplePileup = pileup.getPileupForSampleName("not here");
|
||||
|
|
|
|||
Loading…
Reference in New Issue