SampleDataSource -> SampleDB

This commit is contained in:
Mark DePristo 2011-10-03 09:33:30 -07:00
parent 93fba06cb5
commit 89ac50e86e
11 changed files with 34 additions and 46 deletions

View File

@ -34,8 +34,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
@ -51,7 +50,6 @@ import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.util.*;
@ -88,7 +86,7 @@ public class GenomeAnalysisEngine {
/**
* Accessor for sample metadata
*/
private SampleDataSource sampleDataSource = null;
private SampleDB sampleDB = null;
/**
* Accessor for sharded reference-ordered data.
@ -688,7 +686,7 @@ public class GenomeAnalysisEngine {
for (ReadFilter filter : filters)
filter.initialize(this);
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
sampleDB = new SampleDB(getSAMFileHeader(), argCollection.sampleFiles);
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
@ -953,8 +951,8 @@ public class GenomeAnalysisEngine {
//
// -------------------------------------------------------------------------------------
public SampleDataSource getSampleDB() {
return this.sampleDataSource;
public SampleDB getSampleDB() {
return this.sampleDB;
}
public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {

View File

@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;

View File

@ -138,17 +138,17 @@ public class PedReader {
public PedReader() { }
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB);
}
public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) {
public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
}
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) {
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets?
@ -245,7 +245,7 @@ public class PedReader {
return string;
}
private final Sample maybeAddImplicitSample(SampleDataSource sampleDB, final String id, final String familyID, final Gender gender) {
private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
if ( id != null && sampleDB.getSample(id) == null ) {
Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
sampleDB.addSample(s);

View File

@ -14,12 +14,12 @@ public class Sample implements java.io.Serializable {
final private double quantitativePhenotype;
final private Affection affection;
final private String ID;
final private SampleDataSource dataSource;
final private SampleDB infoDB;
final private Map<String, Object> properties = new HashMap<String, Object>();
public final static double UNSET_QT = Double.NaN;
public Sample(final String ID, final SampleDataSource dataSource,
public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final double quantitativePhenotype) {
this.familyID = familyID;
@ -29,7 +29,7 @@ public class Sample implements java.io.Serializable {
this.quantitativePhenotype = quantitativePhenotype;
this.affection = affection;
this.ID = ID;
this.dataSource = dataSource;
this.infoDB = infoDB;
}
protected Sample(final String ID,
@ -45,17 +45,17 @@ public class Sample implements java.io.Serializable {
}
public Sample(final String ID, final SampleDataSource dataSource,
public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
this(ID, dataSource, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
}
public Sample(final String ID, final SampleDataSource dataSource, final Affection affection, final double quantitativePhenotype) {
this(ID, dataSource, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype);
public Sample(final String ID, final SampleDB infoDB, final Affection affection, final double quantitativePhenotype) {
this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype);
}
public Sample(String id, SampleDataSource dataSource) {
this(id, dataSource, null, null, null,
public Sample(String id, SampleDB infoDB) {
this(id, infoDB, null, null, null,
Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
}
@ -98,7 +98,7 @@ public class Sample implements java.io.Serializable {
* @return sample object with relationship mother, if exists, or null
*/
public Sample getMother() {
return dataSource.getSample(maternalID);
return infoDB.getSample(maternalID);
}
/**
@ -106,7 +106,7 @@ public class Sample implements java.io.Serializable {
* @return sample object with relationship father, if exists, or null
*/
public Sample getFather() {
return dataSource.getSample(paternalID);
return infoDB.getSample(paternalID);
}
/**

View File

@ -24,7 +24,7 @@ import java.util.*;
* wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
*
*/
public class SampleDataSource {
public class SampleDB {
/**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap.
@ -34,11 +34,11 @@ public class SampleDataSource {
/**
* Constructor takes both a SAM header and sample files because the two must be integrated.
*/
public SampleDataSource() {
public SampleDB() {
}
public SampleDataSource(final SAMFileHeader header, final List<File> sampleFiles) {
public SampleDB(final SAMFileHeader header, final List<File> sampleFiles) {
this();
addSamples(header);
addSamples(sampleFiles);
@ -55,7 +55,7 @@ public class SampleDataSource {
/**
* Hallucinates sample objects for all the samples in the SAM file and stores them
*/
protected SampleDataSource addSamples(SAMFileHeader header) {
protected SampleDB addSamples(SAMFileHeader header) {
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (getSample(sampleName) == null) {
Sample newSample = new Sample(sampleName, this);
@ -65,7 +65,7 @@ public class SampleDataSource {
return this;
}
protected SampleDataSource addSamples(final List<File> sampleFiles) {
protected SampleDB addSamples(final List<File> sampleFiles) {
// add files consecutively
for (File file : sampleFiles) {
addSamples(file);
@ -77,7 +77,7 @@ public class SampleDataSource {
* Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file
*/
protected SampleDataSource addSamples(File sampleFile) {
protected SampleDB addSamples(File sampleFile) {
return this;
}
@ -85,7 +85,7 @@ public class SampleDataSource {
* Add a sample to the collection
* @param sample to be added
*/
protected SampleDataSource addSample(Sample sample) {
protected SampleDB addSample(Sample sample) {
samples.put(sample.getID(), sample);
return this;
}

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair;
@ -88,7 +88,7 @@ public abstract class Walker<MapType, ReduceType> {
return getToolkit().getMasterSequenceDictionary();
}
protected SampleDataSource getSampleDB() {
protected SampleDB getSampleDB() {
return getToolkit().getSampleDB();
}

View File

@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;

View File

@ -33,14 +33,12 @@ import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.util.Collections;
import java.util.Iterator;

View File

@ -32,7 +32,6 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.StringReader;
import java.lang.reflect.Array;
import java.util.*;
/**
@ -186,7 +185,7 @@ public class PedReaderUnitTest extends BaseTest {
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
SampleDB sampleDB = new SampleDB();
List<Sample> readSamples = reader.parse(myFileContents, missing, sampleDB);
Assert.assertEquals(new HashSet<Sample>(test.expectedSamples), new HashSet<Sample>(readSamples), "Parsed incorrect number of samples");
}
@ -272,7 +271,7 @@ public class PedReaderUnitTest extends BaseTest {
final String contents = sliceContents(test.missingFields, test.fileContents);
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
SampleDB sampleDB = new SampleDB();
reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
final Sample missingSample = sampleDB.getSample("kid");
Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);

View File

@ -1,12 +1,7 @@
package org.broadinstitute.sting.gatk.samples;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.testng.annotations.Test;
@ -29,6 +24,6 @@ public class SampleDataSourceUnitTest extends BaseTest {
// make sure samples are created from the SAM file correctly
@Test()
public void loadSAMSamplesTest() {
SampleDataSource s = new SampleDataSource(header, Collections.<File>emptyList());
SampleDB s = new SampleDB(header, Collections.<File>emptyList());
}
}

View File

@ -13,14 +13,14 @@ import org.testng.annotations.Test;
* Time: 8:21:00 AM
*/
public class SampleUnitTest extends BaseTest {
SampleDataSource db;
SampleDB db;
static Sample fam1A, fam1B, fam1C;
static Sample s1, s2;
static Sample trait1, trait2, trait3, trait4;
@BeforeClass
public void init() {
db = new SampleDataSource();
db = new SampleDB();
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN);
fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);