Reorganization of Sample

-- Moved Gender and Afflication to separate public enums
-- PedReader 90% implemented
-- Improve interface cleanup to XReadLines and UserException
This commit is contained in:
Mark DePristo 2011-09-30 15:50:54 -04:00
parent c1cf6bc45a
commit 84160bd83f
12 changed files with 386 additions and 136 deletions

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
* Categorical sample trait for association and analysis
*
* Samples can have unknown status, be affected or unaffected by the
* categorical trait, or they can be marked as actually having a
* quantitative trait value (stored in an associated value in the Sample class)
*
* @author Mark DePristo
* @since Sept. 2011
*/
public enum Affection {
/** Status is unknown */
UNKNOWN,
/** Suffers from the disease */
AFFECTED,
/** Unaffected by the disease */
UNAFFECTED,
/** A quantitative trait: value of the trait is stored elsewhere */
QUANTITATIVE
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
* ENUM of possible human genders: male, female, or unknown
*/
public enum Gender {
MALE,
FEMALE,
UNKNOWN
}

View File

@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.Reader;
import java.util.*; import java.util.*;
/** /**
@ -115,10 +117,6 @@ public class PedReader {
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2")); final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
final static private String commentMarker = "#"; final static private String commentMarker = "#";
private final File source;
private final List<PedRecord> records;
public enum MissingPedFields { public enum MissingPedFields {
NO_FAMILY_ID, NO_FAMILY_ID,
NO_PARENTS, NO_PARENTS,
@ -127,8 +125,8 @@ public class PedReader {
} }
// phenotype // phenotype
private final static String PHENOTYPE_MISSING_VALUE = "-9"; private final static String MISSING_VALUE1 = "-9";
private final static String PHENOTYPE_MISSING_VALUE_SECONDARY = "0"; private final static String MISSING_VALUE2 = "0";
private final static String PHENOTYPE_UNAFFECTED = "1"; private final static String PHENOTYPE_UNAFFECTED = "1";
private final static String PHENOTYPE_AFFECTED = "2"; private final static String PHENOTYPE_AFFECTED = "2";
@ -137,14 +135,15 @@ public class PedReader {
private final static String SEX_FEMALE = "2"; private final static String SEX_FEMALE = "2";
// other=unknown // other=unknown
public PedReader(File source, EnumSet<MissingPedFields> missingFields) throws FileNotFoundException { public PedReader() { }
this.source = source;
List<String> lines = new XReadLines(source).readLines(); public final List<Sample> parse(File source, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
this.records = parsePedLines(lines, missingFields); logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB);
} }
private final List<PedRecord> parsePedLines(final List<String> lines, EnumSet<MissingPedFields> missingFields) { public final List<Sample> parse(Reader reader, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields); final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets? // What are the record offsets?
final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0; final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0;
@ -153,7 +152,7 @@ public class PedReader {
final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1; final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1; final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1; final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)); final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
// go through once and determine properties // go through once and determine properties
int lineNo = 1; int lineNo = 1;
@ -164,7 +163,7 @@ public class PedReader {
String[] parts = line.split("\\W+"); String[] parts = line.split("\\W+");
if ( parts.length != nExpectedFields ) if ( parts.length != nExpectedFields )
throw new UserException.MalformedFile(source, "Bad PED line " + lineNo + ": wrong number of fields"); throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
if ( phenotypePos != -1 ) { if ( phenotypePos != -1 ) {
isQT = isQT || CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]); isQT = isQT || CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
@ -177,75 +176,55 @@ public class PedReader {
// now go through and parse each record // now go through and parse each record
lineNo = 1; lineNo = 1;
final List<PedRecord> recs = new ArrayList<PedRecord>(splits.size()); final List<Sample> samples = new ArrayList<Sample>(splits.size());
for ( final String[] parts : splits ) { for ( final String[] parts : splits ) {
String familyID = null, individualID, paternalID = null, maternalID = null; String familyID = null, individualID, paternalID = null, maternalID = null;
Sample.Gender sex = Sample.Gender.UNKNOWN; Gender sex = Gender.UNKNOWN;
double quantitativePhenotype = Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE; double quantitativePhenotype = Sample.UNSET_QT;
Sample.Affection affection = Sample.Affection.UNKNOWN; Affection affection = Affection.UNKNOWN;
if ( familyPos != -1 ) familyID = parts[familyPos]; if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
individualID = parts[samplePos]; individualID = parts[samplePos];
if ( paternalPos != -1 ) paternalID = parts[paternalPos]; if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
if ( maternalPos != -1 ) maternalID = parts[maternalPos]; if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
if ( sexPos != -1 ) { if ( sexPos != -1 ) {
if ( parts[sexPos].equals(SEX_MALE) ) sex = Sample.Gender.MALE; if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Sample.Gender.FEMALE; else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
else sex = Sample.Gender.UNKNOWN; else sex = Gender.UNKNOWN;
} }
if ( phenotypePos != -1 ) { if ( phenotypePos != -1 ) {
if ( isQT ) { if ( isQT ) {
if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE) ) if ( parts[phenotypePos].equals(MISSING_VALUE1) )
affection = Sample.Affection.UNKNOWN; affection = Affection.UNKNOWN;
else { else {
affection = Sample.Affection.QUANTITATIVE; affection = Affection.QUANTITATIVE;
quantitativePhenotype = Double.valueOf(parts[phenotypePos]); quantitativePhenotype = Double.valueOf(parts[phenotypePos]);
} }
} else { } else {
if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE) ) affection = Sample.Affection.UNKNOWN; if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE_SECONDARY) ) affection = Sample.Affection.UNKNOWN; else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Sample.Affection.UNAFFECTED; else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Sample.Affection.AFFECTED; else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo); else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
} }
} }
recs.add(new PedRecord(familyID, individualID, paternalID, maternalID, sex, quantitativePhenotype, affection)); final Sample s = new Sample(familyID, sampleDB, individualID, paternalID, maternalID, sex, affection, quantitativePhenotype);
samples.add(s);
sampleDB.addSample(s);
lineNo++; lineNo++;
} }
return Collections.unmodifiableList(recs); sampleDB.validate(samples);
return samples;
} }
public List<PedRecord> getRecords() { private final static String maybeMissing(final String string) {
return records; if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
} return null;
else
public void fillSampleDB(SampleDataSource db) { return string;
for ( final PedRecord rec : getRecords() ) {
}
}
}
class PedRecord {
final String familyID, individualID, paternalID, maternalID;
final Sample.Gender sex;
final double quantitativePhenotype;
final Sample.Affection affection;
PedRecord(final String familyID, final String individualID,
final String paternalID, final String maternalID,
final Sample.Gender sex,
final double quantitativePhenotype, final Sample.Affection affection) {
this.familyID = familyID;
this.individualID = individualID;
this.paternalID = paternalID;
this.maternalID = maternalID;
this.sex = sex;
this.quantitativePhenotype = quantitativePhenotype;
this.affection = affection;
} }
} }

View File

@ -10,37 +10,18 @@ import java.util.Map;
*/ */
public class Sample implements java.io.Serializable { public class Sample implements java.io.Serializable {
final private String familyID, paternalID, maternalID; final private String familyID, paternalID, maternalID;
final private Sample.Gender gender; final private Gender gender;
final private double quantitativePhenotype; final private double quantitativePhenotype;
final private Sample.Affection affection; final private Affection affection;
final private String ID; final private String ID;
final private SampleDataSource dataSource; final private SampleDataSource dataSource;
final private Map<String, Object> properties = new HashMap<String, Object>();
// todo -- conditionally add the property map -- should be empty by default public final static double UNSET_QT = Double.NaN;
private final Map<String, Object> properties = new HashMap<String, Object>();
public enum Gender {
MALE,
FEMALE,
UNKNOWN
}
public enum Affection {
/** Status is unknown */
UNKNOWN,
/** Suffers from the disease */
AFFECTED,
/** Unaffected by the disease */
UNAFFECTED,
/** A quantitative trait: value of the trait is stored elsewhere */
QUANTITATIVE
}
public final static double UNSET_QUANTITIATIVE_TRAIT_VALUE = Double.NaN;
public Sample(final String ID, final SampleDataSource dataSource, public Sample(final String ID, final SampleDataSource dataSource,
final String familyID, final String paternalID, final String maternalID, final String familyID, final String paternalID, final String maternalID,
final Gender gender, final double quantitativePhenotype, final Affection affection) { final Gender gender, final Affection affection, final double quantitativePhenotype) {
this.familyID = familyID; this.familyID = familyID;
this.paternalID = paternalID; this.paternalID = paternalID;
this.maternalID = maternalID; this.maternalID = maternalID;
@ -51,20 +32,31 @@ public class Sample implements java.io.Serializable {
this.dataSource = dataSource; this.dataSource = dataSource;
} }
public Sample(final String ID, final SampleDataSource dataSource, protected Sample(final String ID,
final String familyID, final String paternalID, final String maternalID, final Gender gender) { final String familyID, final String paternalID, final String maternalID,
this(ID, dataSource, familyID, paternalID, maternalID, gender, final Gender gender, final Affection affection, final double quantitativePhenotype) {
UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN); this(ID, null, familyID, paternalID, maternalID, gender, affection, quantitativePhenotype);
} }
public Sample(final String ID, final SampleDataSource dataSource, final double quantitativePhenotype, final Affection affection) { protected Sample(final String ID,
this(ID, dataSource, null, null, null, Gender.UNKNOWN, quantitativePhenotype, affection); final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection) {
this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
}
public Sample(final String ID, final SampleDataSource dataSource,
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
this(ID, dataSource, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
}
public Sample(final String ID, final SampleDataSource dataSource, final Affection affection, final double quantitativePhenotype) {
this(ID, dataSource, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype);
} }
public Sample(String id, SampleDataSource dataSource) { public Sample(String id, SampleDataSource dataSource) {
this(id, dataSource, this(id, dataSource, null, null, null,
null, null, null, Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
Gender.UNKNOWN, UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN);
} }
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
@ -77,7 +69,6 @@ public class Sample implements java.io.Serializable {
return ID; return ID;
} }
public String getFamilyID() { public String getFamilyID() {
return familyID; return familyID;
} }
@ -157,21 +148,4 @@ public class Sample implements java.io.Serializable {
public boolean hasExtraProperty(String key) { public boolean hasExtraProperty(String key) {
return properties.containsKey(key); return properties.containsKey(key);
} }
// @Override
// public boolean equals(Object o) {
// if (this == o) return true;
// if (o == null || getClass() != o.getClass()) return false;
//
// Sample sample = (Sample) o;
// if (ID != null ? !ID.equals(sample.ID) : sample.ID != null) return false;
// if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
//
// return true;
// }
//
// @Override
// public int hashCode() {
// return ID != null ? ID.hashCode() : "".hashCode();
// }
} }

View File

@ -55,7 +55,7 @@ public class SampleDataSource {
/** /**
* Hallucinates sample objects for all the samples in the SAM file and stores them * Hallucinates sample objects for all the samples in the SAM file and stores them
*/ */
public SampleDataSource addSamples(SAMFileHeader header) { protected SampleDataSource addSamples(SAMFileHeader header) {
for (String sampleName : SampleUtils.getSAMFileSamples(header)) { for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (getSample(sampleName) == null) { if (getSample(sampleName) == null) {
Sample newSample = new Sample(sampleName, this); Sample newSample = new Sample(sampleName, this);
@ -65,7 +65,7 @@ public class SampleDataSource {
return this; return this;
} }
public SampleDataSource addSamples(final List<File> sampleFiles) { protected SampleDataSource addSamples(final List<File> sampleFiles) {
// add files consecutively // add files consecutively
for (File file : sampleFiles) { for (File file : sampleFiles) {
addSamples(file); addSamples(file);
@ -77,7 +77,7 @@ public class SampleDataSource {
* Parse one sample file and integrate it with samples that are already there * Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file * Fail quickly if we find any errors in the file
*/ */
public SampleDataSource addSamples(File sampleFile) { protected SampleDataSource addSamples(File sampleFile) {
return this; return this;
} }
@ -85,7 +85,7 @@ public class SampleDataSource {
* Add a sample to the collection * Add a sample to the collection
* @param sample to be added * @param sample to be added
*/ */
private SampleDataSource addSample(Sample sample) { protected SampleDataSource addSample(Sample sample) {
samples.put(sample.getID(), sample); samples.put(sample.getID(), sample);
return this; return this;
} }
@ -138,8 +138,6 @@ public class SampleDataSource {
// //
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
/** /**
* Get number of sample objects * Get number of sample objects
* @return size of samples map * @return size of samples map
@ -209,4 +207,18 @@ public class SampleDataSource {
} }
return samples; return samples;
} }
// --------------------------------------------------------------------------------
//
// Validation
//
// --------------------------------------------------------------------------------
public final void validate() {
validate(getSamples());
}
public final void validate(Collection<Sample> samplesToCheck) {
}
} }

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.Gender;
import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -248,7 +248,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes(); Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes();
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null; Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
for ( String sample : samples ) { for ( String sample : samples ) {
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Sample.Gender.MALE; boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;
Genotype genotype; Genotype genotype;
boolean isValidation; boolean isValidation;

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Gender;
import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -41,7 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
public class CountMalesWalker extends ReadWalker<Integer, Integer> { public class CountMalesWalker extends ReadWalker<Integer, Integer> {
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
Sample sample = getSampleDB().getSample(read); Sample sample = getSampleDB().getSample(read);
return sample.getGender() == Sample.Gender.MALE ? 1 : 0; return sample.getGender() == Gender.MALE ? 1 : 0;
} }
public Integer reduceInit() { return 0; } public Integer reduceInit() { return 0; }

View File

@ -215,6 +215,10 @@ public class UserException extends ReviewedStingException {
super(String.format("File %s is malformed: %s caused by %s", f.getAbsolutePath(), message, e.getMessage())); super(String.format("File %s is malformed: %s caused by %s", f.getAbsolutePath(), message, e.getMessage()));
} }
public MalformedFile(String name, String message) {
super(String.format("File associated with name %s is malformed: %s", name, message));
}
public MalformedFile(String name, String message, Exception e) { public MalformedFile(String name, String message, Exception e) {
super(String.format("File associated with name %s is malformed: %s caused by %s", name, message, e.getMessage())); super(String.format("File associated with name %s is malformed: %s caused by %s", name, message, e.getMessage()));
} }

View File

@ -99,9 +99,9 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
* *
* @param reader * @param reader
*/ */
public XReadLines(final BufferedReader reader, final boolean trimWhitespace) { public XReadLines(final Reader reader, final boolean trimWhitespace) {
try { try {
this.in = reader; this.in = new BufferedReader(reader);
nextline = readNextLine(); nextline = readNextLine();
this.trimWhitespace = trimWhitespace; this.trimWhitespace = trimWhitespace;
} catch(IOException e) { } catch(IOException e) {
@ -109,7 +109,7 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
} }
} }
public XReadLines(final BufferedReader reader) throws FileNotFoundException { public XReadLines(final Reader reader) {
this(reader, true); this(reader, true);
} }

View File

@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.util.Collections; import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
@ -85,8 +86,6 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
(byte)0); (byte)0);
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
SampleDataSource sampleDataSource = new SampleDataSource().addSamples(reader.getFileHeader());
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out? // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter()); Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups()); LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());

View File

@ -0,0 +1,201 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.StringReader;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
/**
* UnitTest for PedReader
*
* @author Mark DePristo
* @since 2011
*/
public class PedReaderUnitTest extends BaseTest {
private static Logger logger = Logger.getLogger(PedReaderUnitTest.class);
private class PedReaderTest extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
this.fileContents = fileContents;
this.expectedSamples = expectedSamples;
}
}
// Family ID
// Individual ID
// Paternal ID
// Maternal ID
// Sex (1=male; 2=female; other=unknown)
// Phenotype
//
// -9 missing
// 0 missing
// 1 unaffected
// 2 affected
@DataProvider(name = "readerTest")
public Object[][] createPEDFiles() {
new PedReaderTest("singleRecordMale",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 1 1");
new PedReaderTest("singleRecordFemale",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 2 1");
new PedReaderTest("singleRecordMissingGender",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.UNKNOWN, Affection.UNKNOWN)),
"fam1 kid 0 0 0 0");
// Affection
new PedReaderTest("singleRecordAffected",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED)),
"fam1 kid 0 0 1 2");
new PedReaderTest("singleRecordUnaffected",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 1 1");
new PedReaderTest("singleRecordMissingAffection-9",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
"fam1 kid 0 0 1 -9");
new PedReaderTest("singleRecordMissingAffection0",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
"fam1 kid 0 0 1 0");
new PedReaderTest("multipleUnrelated",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.AFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
String.format("%s\n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 2"));
new PedReaderTest("explicitTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s\n%s\n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2"));
new PedReaderTest("implicitTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
"fam1 kid dad mom 1 1");
new PedReaderTest("partialTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s\n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1"));
new PedReaderTest("bigPedigree",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s\n%s\n%s",
"fam1 kid dad mom 1 2",
"fam1 dad granddad1 grandma1 1 1",
"fam1 mom granddad2 grandma2 2 2"));
// Quantitative trait
new PedReaderTest("QuantitativeTrait",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10.0"));
new PedReaderTest("QuantitativeTraitWithMissing",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
"fam1 s1 0 0 1 -9",
"fam2 s2 0 0 2 10.0"));
new PedReaderTest("QuantitativeTraitOnlyInts",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10"));
return PedReaderTest.getTests(PedReaderTest.class);
}
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedFields> missing) {
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
List<Sample> readSamples = reader.parse(new StringReader(myFileContents), missing, sampleDB);
Assert.assertEquals(test.expectedSamples, readSamples, "Parsed incorrect number of samples");
}
@Test(enabled = true, dataProvider = "readerTest")
public void testPedReader(PedReaderTest test) {
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
}
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithComments(PedReaderTest test) {
runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
}
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithMissing(PedReaderTest test) {
// todo -- test MISSING by splicing strings
//runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
}
}

View File

@ -22,17 +22,17 @@ public class SampleUnitTest extends BaseTest {
public void init() { public void init() {
db = new SampleDataSource(); db = new SampleDataSource();
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Sample.Gender.UNKNOWN); fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN);
fam1B = new Sample("1B", db, "fam1", null, null, Sample.Gender.MALE); fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);
fam1C = new Sample("1C", db, "fam1", null, null, Sample.Gender.FEMALE); fam1C = new Sample("1C", db, "fam1", null, null, Gender.FEMALE);
s1 = new Sample("s1", db); s1 = new Sample("s1", db);
s2 = new Sample("s2", db); s2 = new Sample("s2", db);
trait1 = new Sample("t1", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.AFFECTED); trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT);
trait2 = new Sample("t2", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNAFFECTED); trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT);
trait3 = new Sample("t3", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNKNOWN); trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT);
trait4 = new Sample("t4", db, 1.0, Sample.Affection.QUANTITATIVE); trait4 = new Sample("t4", db, Affection.QUANTITATIVE, 1.0);
} }
/** /**
@ -47,8 +47,8 @@ public class SampleUnitTest extends BaseTest {
@Test() @Test()
public void testGenders() { public void testGenders() {
Assert.assertTrue(fam1A.getGender() == Sample.Gender.UNKNOWN); Assert.assertTrue(fam1A.getGender() == Gender.UNKNOWN);
Assert.assertTrue(fam1B.getGender() == Sample.Gender.MALE); Assert.assertTrue(fam1B.getGender() == Gender.MALE);
Assert.assertTrue(fam1C.getGender() == Sample.Gender.FEMALE); Assert.assertTrue(fam1C.getGender() == Gender.FEMALE);
} }
} }