No longer supports extraProperties
-- the underlying data structure is still present, but until I decide what to do for the extensible system I've completely disabled the subsystem -- Added code to merge Samples, so that a mostly full record can be merged with a consistent empty record. If the two records are inconsistent, an error is thrown -- addSample() in Sample.class now invokes mergeSample() when appropriate -- Validation types are now only STRICT or SILENT -- Validation code implemented in SampleDBBuilder -- Extensive unit tests for SampleDBBuilder
This commit is contained in:
parent
867a7476c1
commit
b20689ff55
14
build.xml
14
build.xml
|
|
@ -146,12 +146,14 @@
|
|||
<mkdir dir="${lib.dir}"/>
|
||||
<mkdir dir="${ivy.jar.dir}"/>
|
||||
|
||||
<get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
|
||||
dest="${ivy.jar.dir}/${ivy.jar.file}"
|
||||
usetimestamp="true"/>
|
||||
<taskdef resource="org/apache/ivy/ant/antlib.xml"
|
||||
uri="antlib:org.apache.ivy.ant"
|
||||
classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
|
||||
<!-- Comment out the following two lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
|
||||
<!-- <get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}" -->
|
||||
<!-- dest="${ivy.jar.dir}/${ivy.jar.file}" -->
|
||||
<!-- usetimestamp="true"/> -->
|
||||
<!-- <taskdef resource="org/apache/ivy/ant/antlib.xml" -->
|
||||
<!-- uri="antlib:org.apache.ivy.ant" -->
|
||||
<!-- classpath="${ivy.jar.dir}/${ivy.jar.file}"/> -->
|
||||
|
||||
<ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/>
|
||||
<property name="init.resolve.done" value="true"/>
|
||||
</target>
|
||||
|
|
|
|||
|
|
@ -701,7 +701,8 @@ public class GenomeAnalysisEngine {
|
|||
SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(this, argCollection.pedigreeValidationType);
|
||||
sampleDBBuilder.addSamplesFromSAMHeader(getSAMFileHeader());
|
||||
sampleDBBuilder.addSamplesFromSampleNames(SampleUtils.getUniqueSamplesFromRods(this));
|
||||
sampleDBBuilder.addSamplesFromPedigreeArgument(argCollection.pedigreeData);
|
||||
sampleDBBuilder.addSamplesFromPedigreeFiles(argCollection.pedigreeFiles);
|
||||
sampleDBBuilder.addSamplesFromPedigreeStrings(argCollection.pedigreeStrings);
|
||||
sampleDB = sampleDBBuilder.getFinalSampleDB();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -215,8 +215,11 @@ public class GATKArgumentCollection {
|
|||
/**
|
||||
* MARK: add documentation details
|
||||
*/
|
||||
@Argument(fullName="pedigree", shortName = "ped", doc="Pedigree file / string for samples",required=false)
|
||||
public List<String> pedigreeData = Collections.emptyList();
|
||||
@Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
|
||||
public List<File> pedigreeFiles = Collections.emptyList();
|
||||
|
||||
@Argument(fullName="pedigreeString", shortName = "pedString", doc="Pedigree string for samples",required=false)
|
||||
public List<String> pedigreeStrings = Collections.emptyList();
|
||||
|
||||
@Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="How strict should we be in validating the pedigree information?",required=false)
|
||||
public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,5 @@ package org.broadinstitute.sting.gatk.samples;
|
|||
*/
|
||||
public enum PedigreeValidationType {
|
||||
STRICT,
|
||||
LINIENT,
|
||||
SILENT,
|
||||
SILENT
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -121,36 +123,36 @@ public class Sample implements java.io.Serializable {
|
|||
public String toString() {
|
||||
return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
|
||||
getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
|
||||
getQuantitativePhenotype(), getExtraProperties());
|
||||
getQuantitativePhenotype(), properties);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// code for working with additional -- none standard -- properites
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
public Map<String, Object> getExtraProperties() {
|
||||
return Collections.unmodifiableMap(properties);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get one property
|
||||
* @param key key of property
|
||||
* @return value of property as generic object
|
||||
*/
|
||||
public Object getExtraPropertyValue(final String key) {
|
||||
return properties.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param key property key
|
||||
* @return true if sample has this property (even if its value is null)
|
||||
*/
|
||||
public boolean hasExtraProperty(String key) {
|
||||
return properties.containsKey(key);
|
||||
}
|
||||
// // -------------------------------------------------------------------------------------
|
||||
// //
|
||||
// // code for working with additional -- none standard -- properites
|
||||
// //
|
||||
// // -------------------------------------------------------------------------------------
|
||||
//
|
||||
// public Map<String, Object> getExtraProperties() {
|
||||
// return Collections.unmodifiableMap(properties);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get one property
|
||||
// * @param key key of property
|
||||
// * @return value of property as generic object
|
||||
// */
|
||||
// public Object getExtraPropertyValue(final String key) {
|
||||
// return properties.get(key);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// *
|
||||
// * @param key property key
|
||||
// * @return true if sample has this property (even if its value is null)
|
||||
// */
|
||||
// public boolean hasExtraProperty(String key) {
|
||||
// return properties.containsKey(key);
|
||||
// }
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
|
@ -181,4 +183,36 @@ public class Sample implements java.io.Serializable {
|
|||
else
|
||||
return o2 == null ? false : o1.equals(o2);
|
||||
}
|
||||
|
||||
private final static <T> T mergeValues(final String name, final String field, final T o1, final T o2, final T emptyValue) {
|
||||
if ( o1 == null || o1.equals(emptyValue) ) {
|
||||
// take o2 if both are null, otherwise keep o2
|
||||
return o2 == null ? null : o2;
|
||||
} else {
|
||||
if ( o2 == null || o2.equals(emptyValue) )
|
||||
return o1; // keep o1, since it's a real value
|
||||
else {
|
||||
// both o1 and o2 have a value
|
||||
if ( o1 == o2 )
|
||||
return o1;
|
||||
else
|
||||
throw new UserException("Inconsistent values detected for " + name + " for field " + field + " value1 " + o1 + " value2 " + o2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public final static Sample mergeSamples(final Sample prev, final Sample next) {
|
||||
if ( prev.equals(next) )
|
||||
return next;
|
||||
else {
|
||||
return new Sample(prev.getID(), prev.infoDB,
|
||||
mergeValues(prev.getID(), "Family_ID", prev.getFamilyID(), next.getFamilyID(), null),
|
||||
mergeValues(prev.getID(), "Paternal_ID", prev.getPaternalID(), next.getPaternalID(), null),
|
||||
mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
|
||||
mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
|
||||
mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
|
||||
mergeValues(prev.getID(), "QuantitativeTrait", prev.getQuantitativePhenotype(), next.getQuantitativePhenotype(), UNSET_QT));
|
||||
//mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,9 @@ public class SampleDB {
|
|||
* @param sample to be added
|
||||
*/
|
||||
protected SampleDB addSample(Sample sample) {
|
||||
Sample prev = samples.get(sample.getID());
|
||||
if ( prev != null )
|
||||
sample = Sample.mergeSamples(prev, sample);
|
||||
samples.put(sample.getID(), sample);
|
||||
return this;
|
||||
}
|
||||
|
|
@ -138,8 +141,8 @@ public class SampleDB {
|
|||
return children;
|
||||
}
|
||||
|
||||
public Collection<Sample> getSamples() {
|
||||
return Collections.unmodifiableCollection(samples.values());
|
||||
public Set<Sample> getSamples() {
|
||||
return new HashSet<Sample>(samples.values());
|
||||
}
|
||||
|
||||
public Collection<String> getSampleNames() {
|
||||
|
|
@ -165,22 +168,4 @@ public class SampleDB {
|
|||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Validation
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public final void validate() {
|
||||
validate(getSamples(), PedigreeValidationType.STRICT);
|
||||
}
|
||||
|
||||
public final void validate(PedigreeValidationType validationType) {
|
||||
validate(getSamples(), validationType);
|
||||
}
|
||||
|
||||
public final void validate(Collection<Sample> samplesToCheck, PedigreeValidationType validationType) {
|
||||
// todo -- actually do an implementation
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,15 @@ public class SampleDBBuilder {
|
|||
final SampleDB sampleDB = new SampleDB();
|
||||
final GenomeAnalysisEngine engine;
|
||||
|
||||
Set<Sample> samplesFromDataSources = new HashSet<Sample>();
|
||||
Set<Sample> samplesFromPedigrees = new HashSet<Sample>();
|
||||
|
||||
/** for testing only */
|
||||
protected SampleDBBuilder(PedigreeValidationType validationStrictness) {
|
||||
engine = null;
|
||||
this.validationStrictness = validationStrictness;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor takes both a SAM header and sample files because the two must be integrated.
|
||||
*/
|
||||
|
|
@ -57,26 +66,34 @@ public class SampleDBBuilder {
|
|||
* Hallucinates sample objects for all the samples in the SAM file and stores them
|
||||
*/
|
||||
public SampleDBBuilder addSamplesFromSAMHeader(final SAMFileHeader header) {
|
||||
return addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
|
||||
addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromSampleNames(final Collection<String> sampleNames) {
|
||||
for (final String sampleName : sampleNames) {
|
||||
if (sampleDB.getSample(sampleName) == null) {
|
||||
final Sample newSample = new Sample(sampleName, sampleDB);
|
||||
addSample(newSample);
|
||||
sampleDB.addSample(newSample);
|
||||
samplesFromDataSources.add(newSample); // keep track of data source samples
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromPedigreeArgument(final List<String> pedigreeArguments) {
|
||||
for (final String ped : pedigreeArguments) {
|
||||
final File pedFile = new File(ped);
|
||||
if ( pedFile.exists() )
|
||||
addSamples(pedFile);
|
||||
else
|
||||
addSamples(ped);
|
||||
public SampleDBBuilder addSamplesFromPedigreeFiles(final List<File> pedigreeFiles) {
|
||||
for (final File pedFile : pedigreeFiles) {
|
||||
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedFile);
|
||||
samplesFromPedigrees.addAll(samples);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromPedigreeStrings(final List<String> pedigreeStrings) {
|
||||
for (final String pedString : pedigreeStrings) {
|
||||
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedString);
|
||||
samplesFromPedigrees.addAll(samples);
|
||||
}
|
||||
|
||||
return this;
|
||||
|
|
@ -86,41 +103,55 @@ public class SampleDBBuilder {
|
|||
* Parse one sample file and integrate it with samples that are already there
|
||||
* Fail quickly if we find any errors in the file
|
||||
*/
|
||||
protected SampleDBBuilder addSamples(File sampleFile) {
|
||||
private Collection<Sample> addSamplesFromPedigreeArgument(File sampleFile) {
|
||||
final PedReader reader = new PedReader();
|
||||
|
||||
try {
|
||||
reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
|
||||
return reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(sampleFile, e);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
protected SampleDBBuilder addSamples(final String string) {
|
||||
private Collection<Sample> addSamplesFromPedigreeArgument(final String string) {
|
||||
final PedReader reader = new PedReader();
|
||||
reader.parse(string, getMissingFields(string), sampleDB);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a sample to the collection
|
||||
* @param sample to be added
|
||||
*/
|
||||
protected SampleDBBuilder addSample(Sample sample) {
|
||||
// todo -- merge with existing record if we have one
|
||||
sampleDB.addSample(sample);
|
||||
return this;
|
||||
return reader.parse(string, getMissingFields(string), sampleDB);
|
||||
}
|
||||
|
||||
public SampleDB getFinalSampleDB() {
|
||||
sampleDB.validate(validationStrictness);
|
||||
validate();
|
||||
return sampleDB;
|
||||
}
|
||||
|
||||
public EnumSet<PedReader.MissingPedField> getMissingFields(final Object engineArg) {
|
||||
final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
|
||||
return PedReader.parseMissingFieldTags(engineArg, posTags);
|
||||
if ( engine == null )
|
||||
return EnumSet.noneOf(PedReader.MissingPedField.class);
|
||||
else {
|
||||
final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
|
||||
return PedReader.parseMissingFieldTags(engineArg, posTags);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Validation
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
protected final void validate() {
|
||||
if ( validationStrictness == PedigreeValidationType.SILENT )
|
||||
return;
|
||||
else {
|
||||
// check that samples in data sources are all annotated, if anything is annotated
|
||||
if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
|
||||
final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
|
||||
for ( final Sample pSample : samplesFromPedigrees )
|
||||
sampleNamesFromPedigrees.add(pSample.getID());
|
||||
|
||||
for ( final Sample dsSample : samplesFromDataSources )
|
||||
if ( ! sampleNamesFromPedigrees.contains(dsSample.getID()) )
|
||||
throw new UserException("Sample " + dsSample.getID() + " found in data sources but not in pedigree files");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,11 @@ package org.broadinstitute.sting.gatk.samples;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -15,15 +20,101 @@ import java.util.*;
|
|||
* Time: 8:21:00 AM
|
||||
*/
|
||||
public class SampleDBUnitTest extends BaseTest {
|
||||
// this empty header used to instantiate sampledatasource objects
|
||||
private static SAMFileHeader header = new SAMFileHeader();
|
||||
|
||||
private static SampleDBBuilder builder;
|
||||
// all the test sample files are located here
|
||||
private String sampleFilesDir = validationDataLocation + "samples/";
|
||||
private File testPED = new File(testDir + "ceutrio.ped");
|
||||
|
||||
private static final Set<Sample> testPEDSamples = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)));
|
||||
|
||||
private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
||||
new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN),
|
||||
new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN)));
|
||||
|
||||
private static final String testPEDString =
|
||||
String.format("%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1",
|
||||
"fam1 mom 0 0 2 2");
|
||||
|
||||
private static final String testPEDStringInconsistentGender =
|
||||
"fam1 kid 0 0 2 2";
|
||||
|
||||
private static final Set<Sample> testPEDSamplesAsSet =
|
||||
new HashSet<Sample>(testPEDSamples);
|
||||
|
||||
|
||||
@BeforeMethod
|
||||
public void before() {
|
||||
builder = new SampleDBBuilder(PedigreeValidationType.STRICT);
|
||||
}
|
||||
|
||||
// make sure samples are created from the SAM file correctly
|
||||
@Test()
|
||||
public void loadSAMSamplesTest() {
|
||||
//SampleDB s = new SampleDB(header);
|
||||
public void loadPEDFile() {
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testPEDSamplesAsSet, db.getSamples());
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadPEDString() {
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDString));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testPEDSamplesAsSet, db.getSamples());
|
||||
}
|
||||
|
||||
private static final void addSAMHeader() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
ArtificialSAMUtils.createEnumeratedReadGroups(header, Arrays.asList("1", "2", "3"),
|
||||
Arrays.asList("kid", "mom", "dad"));
|
||||
builder.addSamplesFromSAMHeader(header);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadSAMHeader() {
|
||||
addSAMHeader();
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testSAMSamples, db.getSamples());
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadSAMHeaderPlusPED() {
|
||||
addSAMHeader();
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testPEDSamples, db.getSamples());
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void loadDuplicateData() {
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testPEDSamples, db.getSamples());
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void loadNonExistentFile() {
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(new File("non-existence-file.txt")));
|
||||
SampleDB db = builder.getFinalSampleDB();
|
||||
Assert.assertEquals(testSAMSamples, db.getSamples());
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void loadInconsistentData() {
|
||||
builder = new SampleDBBuilder(PedigreeValidationType.STRICT);
|
||||
builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender));
|
||||
builder.getFinalSampleDB();
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void sampleInSAMHeaderNotInSamplesDB() {
|
||||
addSAMHeader();
|
||||
builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender));
|
||||
builder.getFinalSampleDB();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue