Reorganization of Sample
-- Moved Gender and Afflication to separate public enums -- PedReader 90% implemented -- Improve interface cleanup to XReadLines and UserException
This commit is contained in:
parent
c1cf6bc45a
commit
84160bd83f
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
* Categorical sample trait for association and analysis
|
||||
*
|
||||
* Samples can have unknown status, be affected or unaffected by the
|
||||
* categorical trait, or they can be marked as actually having a
|
||||
* quantitative trait value (stored in an associated value in the Sample class)
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since Sept. 2011
|
||||
*/
|
||||
public enum Affection {
|
||||
/** Status is unknown */
|
||||
UNKNOWN,
|
||||
/** Suffers from the disease */
|
||||
AFFECTED,
|
||||
/** Unaffected by the disease */
|
||||
UNAFFECTED,
|
||||
/** A quantitative trait: value of the trait is stored elsewhere */
|
||||
QUANTITATIVE
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
* ENUM of possible human genders: male, female, or unknown
|
||||
*/
|
||||
public enum Gender {
|
||||
MALE,
|
||||
FEMALE,
|
||||
UNKNOWN
|
||||
}
|
||||
|
|
@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.text.XReadLines;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileReader;
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -115,10 +117,6 @@ public class PedReader {
|
|||
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
|
||||
final static private String commentMarker = "#";
|
||||
|
||||
private final File source;
|
||||
private final List<PedRecord> records;
|
||||
|
||||
|
||||
public enum MissingPedFields {
|
||||
NO_FAMILY_ID,
|
||||
NO_PARENTS,
|
||||
|
|
@ -127,8 +125,8 @@ public class PedReader {
|
|||
}
|
||||
|
||||
// phenotype
|
||||
private final static String PHENOTYPE_MISSING_VALUE = "-9";
|
||||
private final static String PHENOTYPE_MISSING_VALUE_SECONDARY = "0";
|
||||
private final static String MISSING_VALUE1 = "-9";
|
||||
private final static String MISSING_VALUE2 = "0";
|
||||
private final static String PHENOTYPE_UNAFFECTED = "1";
|
||||
private final static String PHENOTYPE_AFFECTED = "2";
|
||||
|
||||
|
|
@ -137,14 +135,15 @@ public class PedReader {
|
|||
private final static String SEX_FEMALE = "2";
|
||||
// other=unknown
|
||||
|
||||
public PedReader(File source, EnumSet<MissingPedFields> missingFields) throws FileNotFoundException {
|
||||
this.source = source;
|
||||
List<String> lines = new XReadLines(source).readLines();
|
||||
this.records = parsePedLines(lines, missingFields);
|
||||
public PedReader() { }
|
||||
|
||||
public final List<Sample> parse(File source, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
|
||||
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
|
||||
return parse(new FileReader(source), missingFields, sampleDB);
|
||||
}
|
||||
|
||||
private final List<PedRecord> parsePedLines(final List<String> lines, EnumSet<MissingPedFields> missingFields) {
|
||||
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
|
||||
public final List<Sample> parse(Reader reader, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) {
|
||||
final List<String> lines = new XReadLines(reader).readLines();
|
||||
|
||||
// What are the record offsets?
|
||||
final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0;
|
||||
|
|
@ -153,7 +152,7 @@ public class PedReader {
|
|||
final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1;
|
||||
final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
|
||||
final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
|
||||
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos));
|
||||
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
|
||||
|
||||
// go through once and determine properties
|
||||
int lineNo = 1;
|
||||
|
|
@ -164,7 +163,7 @@ public class PedReader {
|
|||
String[] parts = line.split("\\W+");
|
||||
|
||||
if ( parts.length != nExpectedFields )
|
||||
throw new UserException.MalformedFile(source, "Bad PED line " + lineNo + ": wrong number of fields");
|
||||
throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
|
||||
|
||||
if ( phenotypePos != -1 ) {
|
||||
isQT = isQT || CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
|
||||
|
|
@ -177,75 +176,55 @@ public class PedReader {
|
|||
|
||||
// now go through and parse each record
|
||||
lineNo = 1;
|
||||
final List<PedRecord> recs = new ArrayList<PedRecord>(splits.size());
|
||||
final List<Sample> samples = new ArrayList<Sample>(splits.size());
|
||||
for ( final String[] parts : splits ) {
|
||||
String familyID = null, individualID, paternalID = null, maternalID = null;
|
||||
Sample.Gender sex = Sample.Gender.UNKNOWN;
|
||||
double quantitativePhenotype = Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE;
|
||||
Sample.Affection affection = Sample.Affection.UNKNOWN;
|
||||
Gender sex = Gender.UNKNOWN;
|
||||
double quantitativePhenotype = Sample.UNSET_QT;
|
||||
Affection affection = Affection.UNKNOWN;
|
||||
|
||||
if ( familyPos != -1 ) familyID = parts[familyPos];
|
||||
if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
|
||||
individualID = parts[samplePos];
|
||||
if ( paternalPos != -1 ) paternalID = parts[paternalPos];
|
||||
if ( maternalPos != -1 ) maternalID = parts[maternalPos];
|
||||
if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
|
||||
if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
|
||||
|
||||
if ( sexPos != -1 ) {
|
||||
if ( parts[sexPos].equals(SEX_MALE) ) sex = Sample.Gender.MALE;
|
||||
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Sample.Gender.FEMALE;
|
||||
else sex = Sample.Gender.UNKNOWN;
|
||||
if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
|
||||
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
|
||||
else sex = Gender.UNKNOWN;
|
||||
}
|
||||
|
||||
if ( phenotypePos != -1 ) {
|
||||
if ( isQT ) {
|
||||
if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE) )
|
||||
affection = Sample.Affection.UNKNOWN;
|
||||
if ( parts[phenotypePos].equals(MISSING_VALUE1) )
|
||||
affection = Affection.UNKNOWN;
|
||||
else {
|
||||
affection = Sample.Affection.QUANTITATIVE;
|
||||
affection = Affection.QUANTITATIVE;
|
||||
quantitativePhenotype = Double.valueOf(parts[phenotypePos]);
|
||||
}
|
||||
} else {
|
||||
if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE) ) affection = Sample.Affection.UNKNOWN;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE_SECONDARY) ) affection = Sample.Affection.UNKNOWN;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Sample.Affection.UNAFFECTED;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Sample.Affection.AFFECTED;
|
||||
if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
|
||||
else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
|
||||
else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
|
||||
}
|
||||
}
|
||||
|
||||
recs.add(new PedRecord(familyID, individualID, paternalID, maternalID, sex, quantitativePhenotype, affection));
|
||||
|
||||
final Sample s = new Sample(familyID, sampleDB, individualID, paternalID, maternalID, sex, affection, quantitativePhenotype);
|
||||
samples.add(s);
|
||||
sampleDB.addSample(s);
|
||||
lineNo++;
|
||||
}
|
||||
|
||||
return Collections.unmodifiableList(recs);
|
||||
sampleDB.validate(samples);
|
||||
return samples;
|
||||
}
|
||||
|
||||
public List<PedRecord> getRecords() {
|
||||
return records;
|
||||
}
|
||||
|
||||
public void fillSampleDB(SampleDataSource db) {
|
||||
for ( final PedRecord rec : getRecords() ) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class PedRecord {
|
||||
final String familyID, individualID, paternalID, maternalID;
|
||||
final Sample.Gender sex;
|
||||
final double quantitativePhenotype;
|
||||
final Sample.Affection affection;
|
||||
|
||||
PedRecord(final String familyID, final String individualID,
|
||||
final String paternalID, final String maternalID,
|
||||
final Sample.Gender sex,
|
||||
final double quantitativePhenotype, final Sample.Affection affection) {
|
||||
this.familyID = familyID;
|
||||
this.individualID = individualID;
|
||||
this.paternalID = paternalID;
|
||||
this.maternalID = maternalID;
|
||||
this.sex = sex;
|
||||
this.quantitativePhenotype = quantitativePhenotype;
|
||||
this.affection = affection;
|
||||
private final static String maybeMissing(final String string) {
|
||||
if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
|
||||
return null;
|
||||
else
|
||||
return string;
|
||||
}
|
||||
}
|
||||
|
|
@ -10,37 +10,18 @@ import java.util.Map;
|
|||
*/
|
||||
public class Sample implements java.io.Serializable {
|
||||
final private String familyID, paternalID, maternalID;
|
||||
final private Sample.Gender gender;
|
||||
final private Gender gender;
|
||||
final private double quantitativePhenotype;
|
||||
final private Sample.Affection affection;
|
||||
final private Affection affection;
|
||||
final private String ID;
|
||||
final private SampleDataSource dataSource;
|
||||
final private Map<String, Object> properties = new HashMap<String, Object>();
|
||||
|
||||
// todo -- conditionally add the property map -- should be empty by default
|
||||
private final Map<String, Object> properties = new HashMap<String, Object>();
|
||||
|
||||
public enum Gender {
|
||||
MALE,
|
||||
FEMALE,
|
||||
UNKNOWN
|
||||
}
|
||||
|
||||
public enum Affection {
|
||||
/** Status is unknown */
|
||||
UNKNOWN,
|
||||
/** Suffers from the disease */
|
||||
AFFECTED,
|
||||
/** Unaffected by the disease */
|
||||
UNAFFECTED,
|
||||
/** A quantitative trait: value of the trait is stored elsewhere */
|
||||
QUANTITATIVE
|
||||
}
|
||||
|
||||
public final static double UNSET_QUANTITIATIVE_TRAIT_VALUE = Double.NaN;
|
||||
public final static double UNSET_QT = Double.NaN;
|
||||
|
||||
public Sample(final String ID, final SampleDataSource dataSource,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final double quantitativePhenotype, final Affection affection) {
|
||||
final Gender gender, final Affection affection, final double quantitativePhenotype) {
|
||||
this.familyID = familyID;
|
||||
this.paternalID = paternalID;
|
||||
this.maternalID = maternalID;
|
||||
|
|
@ -51,20 +32,31 @@ public class Sample implements java.io.Serializable {
|
|||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public Sample(final String ID, final SampleDataSource dataSource,
|
||||
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
|
||||
this(ID, dataSource, familyID, paternalID, maternalID, gender,
|
||||
UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN);
|
||||
protected Sample(final String ID,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final Affection affection, final double quantitativePhenotype) {
|
||||
this(ID, null, familyID, paternalID, maternalID, gender, affection, quantitativePhenotype);
|
||||
}
|
||||
|
||||
public Sample(final String ID, final SampleDataSource dataSource, final double quantitativePhenotype, final Affection affection) {
|
||||
this(ID, dataSource, null, null, null, Gender.UNKNOWN, quantitativePhenotype, affection);
|
||||
protected Sample(final String ID,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final Affection affection) {
|
||||
this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
|
||||
}
|
||||
|
||||
|
||||
public Sample(final String ID, final SampleDataSource dataSource,
|
||||
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
|
||||
this(ID, dataSource, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
|
||||
}
|
||||
|
||||
public Sample(final String ID, final SampleDataSource dataSource, final Affection affection, final double quantitativePhenotype) {
|
||||
this(ID, dataSource, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype);
|
||||
}
|
||||
|
||||
public Sample(String id, SampleDataSource dataSource) {
|
||||
this(id, dataSource,
|
||||
null, null, null,
|
||||
Gender.UNKNOWN, UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN);
|
||||
this(id, dataSource, null, null, null,
|
||||
Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
|
@ -77,7 +69,6 @@ public class Sample implements java.io.Serializable {
|
|||
return ID;
|
||||
}
|
||||
|
||||
|
||||
public String getFamilyID() {
|
||||
return familyID;
|
||||
}
|
||||
|
|
@ -157,21 +148,4 @@ public class Sample implements java.io.Serializable {
|
|||
public boolean hasExtraProperty(String key) {
|
||||
return properties.containsKey(key);
|
||||
}
|
||||
|
||||
// @Override
|
||||
// public boolean equals(Object o) {
|
||||
// if (this == o) return true;
|
||||
// if (o == null || getClass() != o.getClass()) return false;
|
||||
//
|
||||
// Sample sample = (Sample) o;
|
||||
// if (ID != null ? !ID.equals(sample.ID) : sample.ID != null) return false;
|
||||
// if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
|
||||
//
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public int hashCode() {
|
||||
// return ID != null ? ID.hashCode() : "".hashCode();
|
||||
// }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class SampleDataSource {
|
|||
/**
|
||||
* Hallucinates sample objects for all the samples in the SAM file and stores them
|
||||
*/
|
||||
public SampleDataSource addSamples(SAMFileHeader header) {
|
||||
protected SampleDataSource addSamples(SAMFileHeader header) {
|
||||
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
|
||||
if (getSample(sampleName) == null) {
|
||||
Sample newSample = new Sample(sampleName, this);
|
||||
|
|
@ -65,7 +65,7 @@ public class SampleDataSource {
|
|||
return this;
|
||||
}
|
||||
|
||||
public SampleDataSource addSamples(final List<File> sampleFiles) {
|
||||
protected SampleDataSource addSamples(final List<File> sampleFiles) {
|
||||
// add files consecutively
|
||||
for (File file : sampleFiles) {
|
||||
addSamples(file);
|
||||
|
|
@ -77,7 +77,7 @@ public class SampleDataSource {
|
|||
* Parse one sample file and integrate it with samples that are already there
|
||||
* Fail quickly if we find any errors in the file
|
||||
*/
|
||||
public SampleDataSource addSamples(File sampleFile) {
|
||||
protected SampleDataSource addSamples(File sampleFile) {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
@ -85,7 +85,7 @@ public class SampleDataSource {
|
|||
* Add a sample to the collection
|
||||
* @param sample to be added
|
||||
*/
|
||||
private SampleDataSource addSample(Sample sample) {
|
||||
protected SampleDataSource addSample(Sample sample) {
|
||||
samples.put(sample.getID(), sample);
|
||||
return this;
|
||||
}
|
||||
|
|
@ -138,8 +138,6 @@ public class SampleDataSource {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Get number of sample objects
|
||||
* @return size of samples map
|
||||
|
|
@ -209,4 +207,18 @@ public class SampleDataSource {
|
|||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Validation
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public final void validate() {
|
||||
validate(getSamples());
|
||||
}
|
||||
|
||||
public final void validate(Collection<Sample> samplesToCheck) {
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.Gender;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -248,7 +248,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes();
|
||||
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
|
||||
for ( String sample : samples ) {
|
||||
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Sample.Gender.MALE;
|
||||
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;
|
||||
|
||||
Genotype genotype;
|
||||
boolean isValidation;
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Gender;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
|
@ -41,7 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
|
|||
public class CountMalesWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
Sample sample = getSampleDB().getSample(read);
|
||||
return sample.getGender() == Sample.Gender.MALE ? 1 : 0;
|
||||
return sample.getGender() == Gender.MALE ? 1 : 0;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
|
|
|||
|
|
@ -215,6 +215,10 @@ public class UserException extends ReviewedStingException {
|
|||
super(String.format("File %s is malformed: %s caused by %s", f.getAbsolutePath(), message, e.getMessage()));
|
||||
}
|
||||
|
||||
public MalformedFile(String name, String message) {
|
||||
super(String.format("File associated with name %s is malformed: %s", name, message));
|
||||
}
|
||||
|
||||
public MalformedFile(String name, String message, Exception e) {
|
||||
super(String.format("File associated with name %s is malformed: %s caused by %s", name, message, e.getMessage()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,9 +99,9 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
|
|||
*
|
||||
* @param reader
|
||||
*/
|
||||
public XReadLines(final BufferedReader reader, final boolean trimWhitespace) {
|
||||
public XReadLines(final Reader reader, final boolean trimWhitespace) {
|
||||
try {
|
||||
this.in = reader;
|
||||
this.in = new BufferedReader(reader);
|
||||
nextline = readNextLine();
|
||||
this.trimWhitespace = trimWhitespace;
|
||||
} catch(IOException e) {
|
||||
|
|
@ -109,7 +109,7 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
|
|||
}
|
||||
}
|
||||
|
||||
public XReadLines(final BufferedReader reader) throws FileNotFoundException {
|
||||
public XReadLines(final Reader reader) {
|
||||
this(reader, true);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -85,8 +86,6 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
|||
(byte)0);
|
||||
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
SampleDataSource sampleDataSource = new SampleDataSource().addSamples(reader.getFileHeader());
|
||||
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,201 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* UnitTest for PedReader
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 2011
|
||||
*/
|
||||
public class PedReaderUnitTest extends BaseTest {
|
||||
private static Logger logger = Logger.getLogger(PedReaderUnitTest.class);
|
||||
|
||||
private class PedReaderTest extends TestDataProvider {
|
||||
public String fileContents;
|
||||
public List<Sample> expectedSamples;
|
||||
|
||||
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
|
||||
super(PedReaderTest.class, name);
|
||||
this.fileContents = fileContents;
|
||||
this.expectedSamples = expectedSamples;
|
||||
}
|
||||
}
|
||||
|
||||
// Family ID
|
||||
// Individual ID
|
||||
// Paternal ID
|
||||
// Maternal ID
|
||||
// Sex (1=male; 2=female; other=unknown)
|
||||
// Phenotype
|
||||
//
|
||||
// -9 missing
|
||||
// 0 missing
|
||||
// 1 unaffected
|
||||
// 2 affected
|
||||
|
||||
@DataProvider(name = "readerTest")
|
||||
public Object[][] createPEDFiles() {
|
||||
new PedReaderTest("singleRecordMale",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
|
||||
"fam1 kid 0 0 1 1");
|
||||
|
||||
new PedReaderTest("singleRecordFemale",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
|
||||
"fam1 kid 0 0 2 1");
|
||||
|
||||
new PedReaderTest("singleRecordMissingGender",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.UNKNOWN, Affection.UNKNOWN)),
|
||||
"fam1 kid 0 0 0 0");
|
||||
|
||||
// Affection
|
||||
new PedReaderTest("singleRecordAffected",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED)),
|
||||
"fam1 kid 0 0 1 2");
|
||||
|
||||
new PedReaderTest("singleRecordUnaffected",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
|
||||
"fam1 kid 0 0 1 1");
|
||||
|
||||
new PedReaderTest("singleRecordMissingAffection-9",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
|
||||
"fam1 kid 0 0 1 -9");
|
||||
|
||||
new PedReaderTest("singleRecordMissingAffection0",
|
||||
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
|
||||
"fam1 kid 0 0 1 0");
|
||||
|
||||
new PedReaderTest("multipleUnrelated",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
|
||||
String.format("%s\n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 2"));
|
||||
|
||||
new PedReaderTest("explicitTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
String.format("%s\n%s\n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1",
|
||||
"fam1 mom 0 0 2 2"));
|
||||
|
||||
new PedReaderTest("implicitTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
"fam1 kid dad mom 1 1");
|
||||
|
||||
new PedReaderTest("partialTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
String.format("%s\n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1"));
|
||||
|
||||
new PedReaderTest("bigPedigree",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
|
||||
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
|
||||
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
String.format("%s\n%s\n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad granddad1 grandma1 1 1",
|
||||
"fam1 mom granddad2 grandma2 2 2"));
|
||||
|
||||
// Quantitative trait
|
||||
new PedReaderTest("QuantitativeTrait",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
String.format("%s\n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 10.0"));
|
||||
|
||||
new PedReaderTest("QuantitativeTraitWithMissing",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
String.format("%s\n%s",
|
||||
"fam1 s1 0 0 1 -9",
|
||||
"fam2 s2 0 0 2 10.0"));
|
||||
|
||||
new PedReaderTest("QuantitativeTraitOnlyInts",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
String.format("%s\n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 10"));
|
||||
|
||||
return PedReaderTest.getTests(PedReaderTest.class);
|
||||
}
|
||||
|
||||
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedFields> missing) {
|
||||
logger.warn("Test " + test);
|
||||
PedReader reader = new PedReader();
|
||||
SampleDataSource sampleDB = new SampleDataSource();
|
||||
List<Sample> readSamples = reader.parse(new StringReader(myFileContents), missing, sampleDB);
|
||||
Assert.assertEquals(test.expectedSamples, readSamples, "Parsed incorrect number of samples");
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTest")
|
||||
public void testPedReader(PedReaderTest test) {
|
||||
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
|
||||
public void testPedReaderWithComments(PedReaderTest test) {
|
||||
runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
|
||||
public void testPedReaderWithMissing(PedReaderTest test) {
|
||||
// todo -- test MISSING by splicing strings
|
||||
//runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -22,17 +22,17 @@ public class SampleUnitTest extends BaseTest {
|
|||
public void init() {
|
||||
db = new SampleDataSource();
|
||||
|
||||
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Sample.Gender.UNKNOWN);
|
||||
fam1B = new Sample("1B", db, "fam1", null, null, Sample.Gender.MALE);
|
||||
fam1C = new Sample("1C", db, "fam1", null, null, Sample.Gender.FEMALE);
|
||||
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN);
|
||||
fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);
|
||||
fam1C = new Sample("1C", db, "fam1", null, null, Gender.FEMALE);
|
||||
|
||||
s1 = new Sample("s1", db);
|
||||
s2 = new Sample("s2", db);
|
||||
|
||||
trait1 = new Sample("t1", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.AFFECTED);
|
||||
trait2 = new Sample("t2", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNAFFECTED);
|
||||
trait3 = new Sample("t3", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNKNOWN);
|
||||
trait4 = new Sample("t4", db, 1.0, Sample.Affection.QUANTITATIVE);
|
||||
trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT);
|
||||
trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT);
|
||||
trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT);
|
||||
trait4 = new Sample("t4", db, Affection.QUANTITATIVE, 1.0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -47,8 +47,8 @@ public class SampleUnitTest extends BaseTest {
|
|||
|
||||
@Test()
|
||||
public void testGenders() {
|
||||
Assert.assertTrue(fam1A.getGender() == Sample.Gender.UNKNOWN);
|
||||
Assert.assertTrue(fam1B.getGender() == Sample.Gender.MALE);
|
||||
Assert.assertTrue(fam1C.getGender() == Sample.Gender.FEMALE);
|
||||
Assert.assertTrue(fam1A.getGender() == Gender.UNKNOWN);
|
||||
Assert.assertTrue(fam1B.getGender() == Gender.MALE);
|
||||
Assert.assertTrue(fam1C.getGender() == Gender.FEMALE);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue