SampleDataSource -> SampleDB

This commit is contained in:
Mark DePristo 2011-10-03 09:33:30 -07:00
parent 93fba06cb5
commit 89ac50e86e
11 changed files with 34 additions and 46 deletions

View File

@ -34,8 +34,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.executive.MicroScheduler;
import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.filters.ReadFilter;
@ -51,7 +50,6 @@ import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File; import java.io.File;
import java.util.*; import java.util.*;
@ -88,7 +86,7 @@ public class GenomeAnalysisEngine {
/** /**
* Accessor for sample metadata * Accessor for sample metadata
*/ */
private SampleDataSource sampleDataSource = null; private SampleDB sampleDB = null;
/** /**
* Accessor for sharded reference-ordered data. * Accessor for sharded reference-ordered data.
@ -688,7 +686,7 @@ public class GenomeAnalysisEngine {
for (ReadFilter filter : filters) for (ReadFilter filter : filters)
filter.initialize(this); filter.initialize(this);
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles); sampleDB = new SampleDB(getSAMFileHeader(), argCollection.sampleFiles);
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
@ -953,8 +951,8 @@ public class GenomeAnalysisEngine {
// //
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
public SampleDataSource getSampleDB() { public SampleDB getSampleDB() {
return this.sampleDataSource; return this.sampleDB;
} }
public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) { public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {

View File

@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;

View File

@ -138,17 +138,17 @@ public class PedReader {
public PedReader() { } public PedReader() { }
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) throws FileNotFoundException { public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields); logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB); return parse(new FileReader(source), missingFields, sampleDB);
} }
public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) { public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields); logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB); return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
} }
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) { public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
final List<String> lines = new XReadLines(reader).readLines(); final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets? // What are the record offsets?
@ -245,7 +245,7 @@ public class PedReader {
return string; return string;
} }
private final Sample maybeAddImplicitSample(SampleDataSource sampleDB, final String id, final String familyID, final Gender gender) { private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
if ( id != null && sampleDB.getSample(id) == null ) { if ( id != null && sampleDB.getSample(id) == null ) {
Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT); Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
sampleDB.addSample(s); sampleDB.addSample(s);

View File

@ -14,12 +14,12 @@ public class Sample implements java.io.Serializable {
final private double quantitativePhenotype; final private double quantitativePhenotype;
final private Affection affection; final private Affection affection;
final private String ID; final private String ID;
final private SampleDataSource dataSource; final private SampleDB infoDB;
final private Map<String, Object> properties = new HashMap<String, Object>(); final private Map<String, Object> properties = new HashMap<String, Object>();
public final static double UNSET_QT = Double.NaN; public final static double UNSET_QT = Double.NaN;
public Sample(final String ID, final SampleDataSource dataSource, public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID, final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final double quantitativePhenotype) { final Gender gender, final Affection affection, final double quantitativePhenotype) {
this.familyID = familyID; this.familyID = familyID;
@ -29,7 +29,7 @@ public class Sample implements java.io.Serializable {
this.quantitativePhenotype = quantitativePhenotype; this.quantitativePhenotype = quantitativePhenotype;
this.affection = affection; this.affection = affection;
this.ID = ID; this.ID = ID;
this.dataSource = dataSource; this.infoDB = infoDB;
} }
protected Sample(final String ID, protected Sample(final String ID,
@ -45,17 +45,17 @@ public class Sample implements java.io.Serializable {
} }
public Sample(final String ID, final SampleDataSource dataSource, public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID, final Gender gender) { final String familyID, final String paternalID, final String maternalID, final Gender gender) {
this(ID, dataSource, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT); this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
} }
public Sample(final String ID, final SampleDataSource dataSource, final Affection affection, final double quantitativePhenotype) { public Sample(final String ID, final SampleDB infoDB, final Affection affection, final double quantitativePhenotype) {
this(ID, dataSource, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype); this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype);
} }
public Sample(String id, SampleDataSource dataSource) { public Sample(String id, SampleDB infoDB) {
this(id, dataSource, null, null, null, this(id, infoDB, null, null, null,
Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT); Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
} }
@ -98,7 +98,7 @@ public class Sample implements java.io.Serializable {
* @return sample object with relationship mother, if exists, or null * @return sample object with relationship mother, if exists, or null
*/ */
public Sample getMother() { public Sample getMother() {
return dataSource.getSample(maternalID); return infoDB.getSample(maternalID);
} }
/** /**
@ -106,7 +106,7 @@ public class Sample implements java.io.Serializable {
* @return sample object with relationship father, if exists, or null * @return sample object with relationship father, if exists, or null
*/ */
public Sample getFather() { public Sample getFather() {
return dataSource.getSample(paternalID); return infoDB.getSample(paternalID);
} }
/** /**

View File

@ -24,7 +24,7 @@ import java.util.*;
* wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource. * wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
* *
*/ */
public class SampleDataSource { public class SampleDB {
/** /**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap. * this is stored as a HashMap.
@ -34,11 +34,11 @@ public class SampleDataSource {
/** /**
* Constructor takes both a SAM header and sample files because the two must be integrated. * Constructor takes both a SAM header and sample files because the two must be integrated.
*/ */
public SampleDataSource() { public SampleDB() {
} }
public SampleDataSource(final SAMFileHeader header, final List<File> sampleFiles) { public SampleDB(final SAMFileHeader header, final List<File> sampleFiles) {
this(); this();
addSamples(header); addSamples(header);
addSamples(sampleFiles); addSamples(sampleFiles);
@ -55,7 +55,7 @@ public class SampleDataSource {
/** /**
* Hallucinates sample objects for all the samples in the SAM file and stores them * Hallucinates sample objects for all the samples in the SAM file and stores them
*/ */
protected SampleDataSource addSamples(SAMFileHeader header) { protected SampleDB addSamples(SAMFileHeader header) {
for (String sampleName : SampleUtils.getSAMFileSamples(header)) { for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (getSample(sampleName) == null) { if (getSample(sampleName) == null) {
Sample newSample = new Sample(sampleName, this); Sample newSample = new Sample(sampleName, this);
@ -65,7 +65,7 @@ public class SampleDataSource {
return this; return this;
} }
protected SampleDataSource addSamples(final List<File> sampleFiles) { protected SampleDB addSamples(final List<File> sampleFiles) {
// add files consecutively // add files consecutively
for (File file : sampleFiles) { for (File file : sampleFiles) {
addSamples(file); addSamples(file);
@ -77,7 +77,7 @@ public class SampleDataSource {
* Parse one sample file and integrate it with samples that are already there * Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file * Fail quickly if we find any errors in the file
*/ */
protected SampleDataSource addSamples(File sampleFile) { protected SampleDB addSamples(File sampleFile) {
return this; return this;
} }
@ -85,7 +85,7 @@ public class SampleDataSource {
* Add a sample to the collection * Add a sample to the collection
* @param sample to be added * @param sample to be added
*/ */
protected SampleDataSource addSample(Sample sample) { protected SampleDB addSample(Sample sample) {
samples.put(sample.getID(), sample); samples.put(sample.getID(), sample);
return this; return this;
} }

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDataSource; import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
@ -88,7 +88,7 @@ public abstract class Walker<MapType, ReduceType> {
return getToolkit().getMasterSequenceDictionary(); return getToolkit().getMasterSequenceDictionary();
} }
protected SampleDataSource getSampleDB() { protected SampleDB getSampleDB() {
return getToolkit().getSampleDB(); return getToolkit().getSampleDB();
} }

View File

@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.executive.WindowMaker; import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard; import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;

View File

@ -33,14 +33,12 @@ import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.samples.SampleDataSource;
import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.util.Collections; import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;

View File

@ -32,7 +32,6 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.StringReader; import java.io.StringReader;
import java.lang.reflect.Array;
import java.util.*; import java.util.*;
/** /**
@ -186,7 +185,7 @@ public class PedReaderUnitTest extends BaseTest {
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) { private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
logger.warn("Test " + test); logger.warn("Test " + test);
PedReader reader = new PedReader(); PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource(); SampleDB sampleDB = new SampleDB();
List<Sample> readSamples = reader.parse(myFileContents, missing, sampleDB); List<Sample> readSamples = reader.parse(myFileContents, missing, sampleDB);
Assert.assertEquals(new HashSet<Sample>(test.expectedSamples), new HashSet<Sample>(readSamples), "Parsed incorrect number of samples"); Assert.assertEquals(new HashSet<Sample>(test.expectedSamples), new HashSet<Sample>(readSamples), "Parsed incorrect number of samples");
} }
@ -272,7 +271,7 @@ public class PedReaderUnitTest extends BaseTest {
final String contents = sliceContents(test.missingFields, test.fileContents); final String contents = sliceContents(test.missingFields, test.fileContents);
logger.warn("Test " + test); logger.warn("Test " + test);
PedReader reader = new PedReader(); PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource(); SampleDB sampleDB = new SampleDB();
reader.parse(new StringReader(contents), test.missingDesc, sampleDB); reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
final Sample missingSample = sampleDB.getSample("kid"); final Sample missingSample = sampleDB.getSample("kid");
Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test); Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);

View File

@ -1,12 +1,7 @@
package org.broadinstitute.sting.gatk.samples; package org.broadinstitute.sting.gatk.samples;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@ -29,6 +24,6 @@ public class SampleDataSourceUnitTest extends BaseTest {
// make sure samples are created from the SAM file correctly // make sure samples are created from the SAM file correctly
@Test() @Test()
public void loadSAMSamplesTest() { public void loadSAMSamplesTest() {
SampleDataSource s = new SampleDataSource(header, Collections.<File>emptyList()); SampleDB s = new SampleDB(header, Collections.<File>emptyList());
} }
} }

View File

@ -13,14 +13,14 @@ import org.testng.annotations.Test;
* Time: 8:21:00 AM * Time: 8:21:00 AM
*/ */
public class SampleUnitTest extends BaseTest { public class SampleUnitTest extends BaseTest {
SampleDataSource db; SampleDB db;
static Sample fam1A, fam1B, fam1C; static Sample fam1A, fam1B, fam1C;
static Sample s1, s2; static Sample s1, s2;
static Sample trait1, trait2, trait3, trait4; static Sample trait1, trait2, trait3, trait4;
@BeforeClass @BeforeClass
public void init() { public void init() {
db = new SampleDataSource(); db = new SampleDB();
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN); fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN);
fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE); fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);