From 89ac50e86e52bd014f11efd568e73ae52ade6ab2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 3 Oct 2011 09:33:30 -0700 Subject: [PATCH] SampleDataSource -> SampleDB --- .../sting/gatk/GenomeAnalysisEngine.java | 12 +++++----- .../sting/gatk/executive/WindowMaker.java | 1 - .../sting/gatk/samples/PedReader.java | 8 +++---- .../sting/gatk/samples/Sample.java | 22 +++++++++---------- .../{SampleDataSource.java => SampleDB.java} | 14 ++++++------ .../sting/gatk/walkers/Walker.java | 4 ++-- .../providers/LocusViewTemplate.java | 1 - .../reads/DownsamplerBenchmark.java | 2 -- .../sting/gatk/samples/PedReaderUnitTest.java | 5 ++--- .../samples/SampleDataSourceUnitTest.java | 7 +----- .../sting/gatk/samples/SampleUnitTest.java | 4 ++-- 11 files changed, 34 insertions(+), 46 deletions(-) rename public/java/src/org/broadinstitute/sting/gatk/samples/{SampleDataSource.java => SampleDB.java} (94%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 52544fbd2..a9a7de75f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -34,8 +34,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.samples.Sample; -import org.broadinstitute.sting.gatk.samples.SampleDataSource; +import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; @@ -51,7 +50,6 @@ import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.*; @@ -88,7 +86,7 @@ public class GenomeAnalysisEngine { /** * Accessor for sample metadata */ - private SampleDataSource sampleDataSource = null; + private SampleDB sampleDB = null; /** * Accessor for sharded reference-ordered data. @@ -688,7 +686,7 @@ public class GenomeAnalysisEngine { for (ReadFilter filter : filters) filter.initialize(this); - sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles); + sampleDB = new SampleDB(getSAMFileHeader(), argCollection.sampleFiles); // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); @@ -953,8 +951,8 @@ public class GenomeAnalysisEngine { // // ------------------------------------------------------------------------------------- - public SampleDataSource getSampleDB() { - return this.sampleDataSource; + public SampleDB getSampleDB() { + return this.sampleDB; } public Map getApproximateCommandLineArguments(Object... argumentProviders) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 825a81e64..d1f5d80da 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.samples.SampleDataSource; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java index 648637b09..d697498be 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java @@ -138,17 +138,17 @@ public class PedReader { public PedReader() { } - public final List parse(File source, EnumSet missingFields, SampleDataSource sampleDB) throws FileNotFoundException { + public final List parse(File source, EnumSet missingFields, SampleDB sampleDB) throws FileNotFoundException { logger.info("Reading PED file " + source + " with missing fields: " + missingFields); return parse(new FileReader(source), missingFields, sampleDB); } - public final List parse(final String source, EnumSet missingFields, SampleDataSource sampleDB) { + public final List parse(final String source, EnumSet missingFields, SampleDB sampleDB) { logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields); return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB); } - public final List parse(Reader reader, EnumSet missingFields, SampleDataSource sampleDB) { + public final List parse(Reader reader, EnumSet missingFields, SampleDB sampleDB) { final List lines = new XReadLines(reader).readLines(); // What are the record offsets? @@ -245,7 +245,7 @@ public class PedReader { return string; } - private final Sample maybeAddImplicitSample(SampleDataSource sampleDB, final String id, final String familyID, final Gender gender) { + private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) { if ( id != null && sampleDB.getSample(id) == null ) { Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT); sampleDB.addSample(s); diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java index 0a5043013..e68d92a9f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java @@ -14,12 +14,12 @@ public class Sample implements java.io.Serializable { final private double quantitativePhenotype; final private Affection affection; final private String ID; - final private SampleDataSource dataSource; + final private SampleDB infoDB; final private Map properties = new HashMap(); public final static double UNSET_QT = Double.NaN; - public Sample(final String ID, final SampleDataSource dataSource, + public Sample(final String ID, final SampleDB infoDB, final String familyID, final String paternalID, final String maternalID, final Gender gender, final Affection affection, final double quantitativePhenotype) { this.familyID = familyID; @@ -29,7 +29,7 @@ public class Sample implements java.io.Serializable { this.quantitativePhenotype = quantitativePhenotype; this.affection = affection; this.ID = ID; - this.dataSource = dataSource; + this.infoDB = infoDB; } protected Sample(final String ID, @@ -45,17 +45,17 @@ public class Sample implements java.io.Serializable { } - public Sample(final String ID, final SampleDataSource dataSource, + public Sample(final String ID, final SampleDB infoDB, final String familyID, final String paternalID, final String maternalID, final Gender gender) { - this(ID, dataSource, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT); + this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT); } - public Sample(final String ID, final SampleDataSource dataSource, final Affection affection, final double quantitativePhenotype) { - this(ID, dataSource, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype); + public Sample(final String ID, final SampleDB infoDB, final Affection affection, final double quantitativePhenotype) { + this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype); } - public Sample(String id, SampleDataSource dataSource) { - this(id, dataSource, null, null, null, + public Sample(String id, SampleDB infoDB) { + this(id, infoDB, null, null, null, Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT); } @@ -98,7 +98,7 @@ public class Sample implements java.io.Serializable { * @return sample object with relationship mother, if exists, or null */ public Sample getMother() { - return dataSource.getSample(maternalID); + return infoDB.getSample(maternalID); } /** @@ -106,7 +106,7 @@ public class Sample implements java.io.Serializable { * @return sample object with relationship father, if exists, or null */ public Sample getFather() { - return dataSource.getSample(paternalID); + return infoDB.getSample(paternalID); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java rename to public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java index b85759de2..6a2ec2ac4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java @@ -24,7 +24,7 @@ import java.util.*; * wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource. * */ -public class SampleDataSource { +public class SampleDB { /** * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so * this is stored as a HashMap. @@ -34,11 +34,11 @@ public class SampleDataSource { /** * Constructor takes both a SAM header and sample files because the two must be integrated. */ - public SampleDataSource() { + public SampleDB() { } - public SampleDataSource(final SAMFileHeader header, final List sampleFiles) { + public SampleDB(final SAMFileHeader header, final List sampleFiles) { this(); addSamples(header); addSamples(sampleFiles); @@ -55,7 +55,7 @@ public class SampleDataSource { /** * Hallucinates sample objects for all the samples in the SAM file and stores them */ - protected SampleDataSource addSamples(SAMFileHeader header) { + protected SampleDB addSamples(SAMFileHeader header) { for (String sampleName : SampleUtils.getSAMFileSamples(header)) { if (getSample(sampleName) == null) { Sample newSample = new Sample(sampleName, this); @@ -65,7 +65,7 @@ public class SampleDataSource { return this; } - protected SampleDataSource addSamples(final List sampleFiles) { + protected SampleDB addSamples(final List sampleFiles) { // add files consecutively for (File file : sampleFiles) { addSamples(file); @@ -77,7 +77,7 @@ public class SampleDataSource { * Parse one sample file and integrate it with samples that are already there * Fail quickly if we find any errors in the file */ - protected SampleDataSource addSamples(File sampleFile) { + protected SampleDB addSamples(File sampleFile) { return this; } @@ -85,7 +85,7 @@ public class SampleDataSource { * Add a sample to the collection * @param sample to be added */ - protected SampleDataSource addSample(Sample sample) { + protected SampleDB addSample(Sample sample) { samples.put(sample.getID(), sample); return this; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index f67dace2c..792fef9c3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.gatk.samples.Sample; -import org.broadinstitute.sting.gatk.samples.SampleDataSource; +import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; @@ -88,7 +88,7 @@ public abstract class Walker { return getToolkit().getMasterSequenceDictionary(); } - protected SampleDataSource getSampleDB() { + protected SampleDB getSampleDB() { return getToolkit().getSampleDB(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 8b226101a..2adb4864c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.executive.WindowMaker; -import org.broadinstitute.sting.gatk.samples.SampleDataSource; import org.broadinstitute.sting.gatk.datasources.reads.LocusShard; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java index 0d5734d43..5ee373e4f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java @@ -33,14 +33,12 @@ import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.samples.SampleDataSource; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; -import java.io.File; import java.util.Collections; import java.util.Iterator; diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java index 16c1d178b..c14995dca 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java @@ -32,7 +32,6 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.StringReader; -import java.lang.reflect.Array; import java.util.*; /** @@ -186,7 +185,7 @@ public class PedReaderUnitTest extends BaseTest { private static final void runTest(PedReaderTest test, String myFileContents, EnumSet missing) { logger.warn("Test " + test); PedReader reader = new PedReader(); - SampleDataSource sampleDB = new SampleDataSource(); + SampleDB sampleDB = new SampleDB(); List readSamples = reader.parse(myFileContents, missing, sampleDB); Assert.assertEquals(new HashSet(test.expectedSamples), new HashSet(readSamples), "Parsed incorrect number of samples"); } @@ -272,7 +271,7 @@ public class PedReaderUnitTest extends BaseTest { final String contents = sliceContents(test.missingFields, test.fileContents); logger.warn("Test " + test); PedReader reader = new PedReader(); - SampleDataSource sampleDB = new SampleDataSource(); + SampleDB sampleDB = new SampleDB(); reader.parse(new StringReader(contents), test.missingDesc, sampleDB); final Sample missingSample = sampleDB.getSample("kid"); Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test); diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDataSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDataSourceUnitTest.java index 3d40d4de8..90dd8e36e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDataSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDataSourceUnitTest.java @@ -1,12 +1,7 @@ package org.broadinstitute.sting.gatk.samples; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.testng.Assert; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.testng.annotations.Test; @@ -29,6 +24,6 @@ public class SampleDataSourceUnitTest extends BaseTest { // make sure samples are created from the SAM file correctly @Test() public void loadSAMSamplesTest() { - SampleDataSource s = new SampleDataSource(header, Collections.emptyList()); + SampleDB s = new SampleDB(header, Collections.emptyList()); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java index 279319edb..372b59353 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java @@ -13,14 +13,14 @@ import org.testng.annotations.Test; * Time: 8:21:00 AM */ public class SampleUnitTest extends BaseTest { - SampleDataSource db; + SampleDB db; static Sample fam1A, fam1B, fam1C; static Sample s1, s2; static Sample trait1, trait2, trait3, trait4; @BeforeClass public void init() { - db = new SampleDataSource(); + db = new SampleDB(); fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN); fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);