Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
94e7f677ad
|
|
@ -75,11 +75,10 @@ public class BQSRIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList(params.md5));
|
Arrays.asList(params.md5));
|
||||||
executeTest("testBQSR-"+params.args, spec).getFirst();
|
executeTest("testBQSR-"+params.args, spec).getFirst();
|
||||||
|
|
||||||
// TODO -- re-enable once parallelization is fixed in BaseRecalibrator
|
WalkerTestSpec specNT2 = new WalkerTestSpec(
|
||||||
//WalkerTestSpec specNT2 = new WalkerTestSpec(
|
params.getCommandLine() + " -nt 2",
|
||||||
// params.getCommandLine() + " -nt 2",
|
Arrays.asList(params.md5));
|
||||||
// Arrays.asList(params.md5));
|
executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
|
||||||
//executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
|
|
@ -301,6 +301,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||||
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
|
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
|
||||||
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
|
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
|
||||||
|
final boolean isSingleElementCigar = nextElement == lastElement;
|
||||||
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
|
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
|
||||||
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
|
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
|
||||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||||
|
|
@ -308,7 +309,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
|
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
|
||||||
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
|
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
|
||||||
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
|
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
|
||||||
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION;
|
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION && !isSingleElementCigar;
|
||||||
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
|
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
|
||||||
|
|
||||||
int nextElementLength = nextElement.getLength();
|
int nextElementLength = nextElement.getLength();
|
||||||
|
|
@ -328,8 +329,10 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
else {
|
else {
|
||||||
if (!filterBaseInRead(read, location.getStart())) {
|
if (!filterBaseInRead(read, location.getStart())) {
|
||||||
String insertedBaseString = null;
|
String insertedBaseString = null;
|
||||||
if (nextOp == CigarOperator.I)
|
if (nextOp == CigarOperator.I) {
|
||||||
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + nextElement.getLength()));
|
final int insertionOffset = isSingleElementCigar ? 0 : 1;
|
||||||
|
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength()));
|
||||||
|
}
|
||||||
|
|
||||||
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
|
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
|
||||||
size++;
|
size++;
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -28,9 +29,13 @@ import java.util.*;
|
||||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
||||||
|
|
||||||
private MendelianViolation mendelianViolation = null;
|
private MendelianViolation mendelianViolation = null;
|
||||||
private String motherId;
|
private Set<Trio> trios;
|
||||||
private String fatherId;
|
|
||||||
private String childId;
|
private class Trio {
|
||||||
|
String motherId;
|
||||||
|
String fatherId;
|
||||||
|
String childId;
|
||||||
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( mendelianViolation == null ) {
|
if ( mendelianViolation == null ) {
|
||||||
|
|
@ -38,17 +43,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
|
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||||
boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() &&
|
//double pNoMV = 1.0;
|
||||||
vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() &&
|
double maxMVLR = Double.MIN_VALUE;
|
||||||
vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods();
|
for ( Trio trio : trios ) {
|
||||||
if ( hasAppropriateGenotypes )
|
boolean hasAppropriateGenotypes = vc.hasGenotype(trio.motherId) && vc.getGenotype(trio.motherId).hasLikelihoods() &&
|
||||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId));
|
vc.hasGenotype(trio.fatherId) && vc.getGenotype(trio.fatherId).hasLikelihoods() &&
|
||||||
|
vc.hasGenotype(trio.childId) && vc.getGenotype(trio.childId).hasLikelihoods();
|
||||||
|
if ( hasAppropriateGenotypes ) {
|
||||||
|
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.motherId,trio.fatherId,trio.childId);
|
||||||
|
maxMVLR = likR > maxMVLR ? likR : maxMVLR;
|
||||||
|
//pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//double pSomeMV = 1.0-pNoMV;
|
||||||
|
//toRet.put("MVLR",Math.log10(pSomeMV)-Math.log10(1.0-pSomeMV));
|
||||||
|
toRet.put("MVLR",maxMVLR);
|
||||||
return toRet;
|
return toRet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -58,25 +73,24 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||||
|
|
||||||
private boolean checkAndSetSamples(SampleDB db){
|
private boolean checkAndSetSamples(SampleDB db){
|
||||||
|
trios = new HashSet<Trio>();
|
||||||
Set<String> families = db.getFamilyIDs();
|
Set<String> families = db.getFamilyIDs();
|
||||||
if(families.size() != 1)
|
for ( String familyString : families ) {
|
||||||
return false;
|
Set<Sample> family = db.getFamily(familyString);
|
||||||
|
Iterator<Sample> sampleIterator = family.iterator();
|
||||||
Set<Sample> family = db.getFamily(families.iterator().next());
|
|
||||||
if(family.size() != 3)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Iterator<Sample> sampleIter = family.iterator();
|
|
||||||
Sample sample;
|
Sample sample;
|
||||||
for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){
|
for ( sample = sampleIterator.next(); sampleIterator.hasNext(); sample=sampleIterator.next()) {
|
||||||
if(sample.getParents().size()==2){
|
if ( sample.getParents().size() == 2 ) {
|
||||||
motherId = sample.getMaternalID();
|
Trio trio = new Trio();
|
||||||
fatherId = sample.getPaternalID();
|
trio.childId = sample.getID();
|
||||||
childId = sample.getID();
|
trio.fatherId = sample.getFather().getID();
|
||||||
return true;
|
trio.motherId = sample.getMother().getID();
|
||||||
|
trios.add(trio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
}
|
||||||
|
|
||||||
|
return trios.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -136,10 +136,6 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
||||||
*/
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
// TODO -- remove me after the 2.1 release
|
|
||||||
if ( getToolkit().getArguments().numberOfThreads > 1 )
|
|
||||||
throw new UserException("We have temporarily disabled the ability to run BaseRecalibrator multi-threaded for performance reasons. We hope to have this fixed for the next GATK release (2.2) and apologize for the inconvenience.");
|
|
||||||
|
|
||||||
// check for unsupported access
|
// check for unsupported access
|
||||||
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
|
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
|
||||||
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
|
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
|
||||||
|
|
|
||||||
|
|
@ -297,7 +297,7 @@ public class VariantDataManager {
|
||||||
case SNP:
|
case SNP:
|
||||||
return evalVC.isSNP() || evalVC.isMNP();
|
return evalVC.isSNP() || evalVC.isMNP();
|
||||||
case INDEL:
|
case INDEL:
|
||||||
return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
|
return evalVC.isStructuralIndel() || evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
|
||||||
case BOTH:
|
case BOTH:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
|
|
|
||||||
|
|
@ -322,6 +322,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="justRead", doc="If true, we won't actually write the output file. For efficiency testing only", required=false)
|
@Argument(fullName="justRead", doc="If true, we won't actually write the output file. For efficiency testing only", required=false)
|
||||||
private boolean justRead = false;
|
private boolean justRead = false;
|
||||||
|
|
||||||
|
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
|
||||||
|
private int maxIndelSize = Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
|
||||||
/* Private class used to store the intermediate variants in the integer random selection process */
|
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||||
private static class RandomVariantStructure {
|
private static class RandomVariantStructure {
|
||||||
|
|
@ -541,6 +544,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
if (!selectedTypes.contains(vc.getType()))
|
if (!selectedTypes.contains(vc.getType()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if ( badIndelSize(vc) )
|
||||||
|
continue;
|
||||||
|
|
||||||
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
|
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
|
||||||
|
|
||||||
if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) {
|
if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) {
|
||||||
|
|
@ -572,6 +578,20 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean badIndelSize(final VariantContext vc) {
|
||||||
|
if ( vc.getReference().length() > maxIndelSize ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( Allele a : vc.getAlternateAlleles() ) {
|
||||||
|
if ( a.length() > maxIndelSize ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private boolean hasPLs(final VariantContext vc) {
|
private boolean hasPLs(final VariantContext vc) {
|
||||||
for ( Genotype g : vc.getGenotypes() ) {
|
for ( Genotype g : vc.getGenotypes() ) {
|
||||||
if ( g.hasLikelihoods() )
|
if ( g.hasLikelihoods() )
|
||||||
|
|
|
||||||
|
|
@ -76,47 +76,11 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
||||||
private List<String> famOrder = new ArrayList<String>();
|
private List<String> famOrder = new ArrayList<String>();
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
vv.variantCollection = variantCollection;
|
initializeValidator();
|
||||||
vv.dbsnp = dbsnp;
|
writeBedHeader();
|
||||||
vv.DO_NOT_VALIDATE_FILTERED = true;
|
Map<String,Map<String,String>> sampleMetaValues = parseMetaData();
|
||||||
vv.type = ValidateVariants.ValidationType.REF;
|
|
||||||
// create temporary output streams and buffers
|
// create temporary output streams and buffers
|
||||||
|
|
||||||
// write magic bits into the ped file
|
|
||||||
try {
|
|
||||||
outBed.write(new byte[] { (byte) 0x6c, (byte) 0x1b, 0x0});
|
|
||||||
// ultimately, the bed will be in individual-major mode
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new ReviewedStingException("error writing to output file.");
|
|
||||||
}
|
|
||||||
// write to the fam file, the first six columns of the standard ped file
|
|
||||||
// first, load data from the input meta data file
|
|
||||||
Map<String,Map<String,String>> metaValues = new HashMap<String,Map<String,String>>();
|
|
||||||
logger.debug("Reading in metadata...");
|
|
||||||
try {
|
|
||||||
if ( metaDataFile.getAbsolutePath().endsWith(".fam") ) {
|
|
||||||
for ( String line : new XReadLines(metaDataFile) ) {
|
|
||||||
String[] famSplit = line.split("\\t");
|
|
||||||
String sid = famSplit[1];
|
|
||||||
outFam.printf("%s%n",line);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for ( String line : new XReadLines(metaDataFile) ) {
|
|
||||||
logger.debug(line);
|
|
||||||
String[] split = line.split("\\t");
|
|
||||||
String sampleID = split[0];
|
|
||||||
String keyVals = split[1];
|
|
||||||
HashMap<String,String> values = new HashMap<String, String>();
|
|
||||||
for ( String kvp : keyVals.split(";") ) {
|
|
||||||
String[] kvp_split = kvp.split("=");
|
|
||||||
values.put(kvp_split[0],kvp_split[1]);
|
|
||||||
}
|
|
||||||
metaValues.put(sampleID,values);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new UserException("Meta data file not found: "+metaDataFile.getAbsolutePath(),e);
|
|
||||||
}
|
|
||||||
// family ID, individual ID, Paternal ID, Maternal ID, Sex, Phenotype
|
// family ID, individual ID, Paternal ID, Maternal ID, Sex, Phenotype
|
||||||
int dummyID = 0; // increments for dummy parental and family IDs used
|
int dummyID = 0; // increments for dummy parental and family IDs used
|
||||||
// want to be especially careful to maintain order here
|
// want to be especially careful to maintain order here
|
||||||
|
|
@ -126,7 +90,8 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for ( String sample : header.getValue().getGenotypeSamples() ) {
|
for ( String sample : header.getValue().getGenotypeSamples() ) {
|
||||||
Map<String,String> mVals = metaValues.get(sample);
|
if ( ! metaDataFile.getAbsolutePath().endsWith(".fam") ) {
|
||||||
|
Map<String,String> mVals = sampleMetaValues.get(sample);
|
||||||
if ( mVals == null ) {
|
if ( mVals == null ) {
|
||||||
throw new UserException("No metadata provided for sample "+sample);
|
throw new UserException("No metadata provided for sample "+sample);
|
||||||
}
|
}
|
||||||
|
|
@ -139,8 +104,9 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
||||||
String sex = mVals.containsKey("sex") ? mVals.get("sex") : "3";
|
String sex = mVals.containsKey("sex") ? mVals.get("sex") : "3";
|
||||||
String pheno = mVals.get("phenotype");
|
String pheno = mVals.get("phenotype");
|
||||||
outFam.printf("%s\t%s\t%s\t%s\t%s\t%s%n",fid,sample,pid,mid,sex,pheno);
|
outFam.printf("%s\t%s\t%s\t%s\t%s\t%s%n",fid,sample,pid,mid,sex,pheno);
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
File temp = File.createTempFile(sample, ".tmp");
|
File temp = File.createTempFile("VariantsToBPed_"+sample, ".tmp");
|
||||||
printMap.put(sample,new PrintStream(temp));
|
printMap.put(sample,new PrintStream(temp));
|
||||||
tempFiles.put(sample,temp);
|
tempFiles.put(sample,temp);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
@ -216,6 +182,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
||||||
// reset the buffer for this sample
|
// reset the buffer for this sample
|
||||||
genotypeBuffer.put(sample,new byte[BUFFER_SIZE]);
|
genotypeBuffer.put(sample,new byte[BUFFER_SIZE]);
|
||||||
}
|
}
|
||||||
|
byteCount = 0;
|
||||||
}
|
}
|
||||||
genotypeCount = 0;
|
genotypeCount = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -337,4 +304,69 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
||||||
throw new UserException("Allele frequency appears to be neither String nor Double. Please check the header of your VCF.");
|
throw new UserException("Allele frequency appears to be neither String nor Double. Please check the header of your VCF.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void initializeValidator() {
|
||||||
|
vv.variantCollection = variantCollection;
|
||||||
|
vv.dbsnp = dbsnp;
|
||||||
|
vv.DO_NOT_VALIDATE_FILTERED = true;
|
||||||
|
vv.type = ValidateVariants.ValidationType.REF;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeBedHeader() {
|
||||||
|
// write magic bits into the ped file
|
||||||
|
try {
|
||||||
|
outBed.write(new byte[] { (byte) 0x6c, (byte) 0x1b, 0x0});
|
||||||
|
// ultimately, the bed will be in individual-major mode
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ReviewedStingException("error writing to output file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String,Map<String,String>> parseMetaData() {
|
||||||
|
// write to the fam file, the first six columns of the standard ped file
|
||||||
|
// first, load data from the input meta data file
|
||||||
|
Map<String,Map<String,String>> metaValues = new HashMap<String,Map<String,String>>();
|
||||||
|
logger.debug("Reading in metadata...");
|
||||||
|
try {
|
||||||
|
if ( metaDataFile.getAbsolutePath().endsWith(".fam") ) {
|
||||||
|
for ( String line : new XReadLines(metaDataFile) ) {
|
||||||
|
String[] famSplit = line.split("\\s+");
|
||||||
|
if ( famSplit.length != 6 ) {
|
||||||
|
throw new UserException("Line of the fam file is malformatted. Expected 6 entries. Line is "+line);
|
||||||
|
}
|
||||||
|
String sid = famSplit[1];
|
||||||
|
String fid = famSplit[0];
|
||||||
|
String mom = famSplit[2];
|
||||||
|
String dad = famSplit[3];
|
||||||
|
String sex = famSplit[4];
|
||||||
|
String pheno = famSplit[5];
|
||||||
|
HashMap<String,String> values = new HashMap<String, String>();
|
||||||
|
values.put("mom",mom);
|
||||||
|
values.put("dad",dad);
|
||||||
|
values.put("fid",fid);
|
||||||
|
values.put("sex",sex);
|
||||||
|
values.put("phenotype",pheno);
|
||||||
|
metaValues.put(sid,values);
|
||||||
|
outFam.printf("%s%n",line);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for ( String line : new XReadLines(metaDataFile) ) {
|
||||||
|
logger.debug(line);
|
||||||
|
String[] split = line.split("\\s+");
|
||||||
|
String sampleID = split[0];
|
||||||
|
String keyVals = split[1];
|
||||||
|
HashMap<String,String> values = new HashMap<String, String>();
|
||||||
|
for ( String kvp : keyVals.split(";") ) {
|
||||||
|
String[] kvp_split = kvp.split("=");
|
||||||
|
values.put(kvp_split[0],kvp_split[1]);
|
||||||
|
}
|
||||||
|
metaValues.put(sampleID,values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new UserException("Meta data file not found: "+metaDataFile.getAbsolutePath(),e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return metaValues;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.utils.variantcontext;
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
|
import org.apache.commons.math.stat.descriptive.rank.Max;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
|
|
@ -178,9 +179,8 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class VariantContext implements Feature { // to enable tribble integration
|
public class VariantContext implements Feature { // to enable tribble integration
|
||||||
private final static boolean WARN_ABOUT_BAD_END = true;
|
private final static boolean WARN_ABOUT_BAD_END = true;
|
||||||
|
private final static long MAX_ALLELE_SIZE_FOR_NON_SV = 150;
|
||||||
final protected static Logger logger = Logger.getLogger(VariantContext.class);
|
final protected static Logger logger = Logger.getLogger(VariantContext.class);
|
||||||
|
|
||||||
|
|
||||||
private boolean fullyDecoded = false;
|
private boolean fullyDecoded = false;
|
||||||
protected CommonInfo commonInfo = null;
|
protected CommonInfo commonInfo = null;
|
||||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||||
|
|
@ -458,6 +458,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
SNP,
|
SNP,
|
||||||
MNP, // a multi-nucleotide polymorphism
|
MNP, // a multi-nucleotide polymorphism
|
||||||
INDEL,
|
INDEL,
|
||||||
|
STRUCTURAL_INDEL,
|
||||||
SYMBOLIC,
|
SYMBOLIC,
|
||||||
MIXED,
|
MIXED,
|
||||||
}
|
}
|
||||||
|
|
@ -530,6 +531,18 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
return getType() == Type.SYMBOLIC;
|
return getType() == Type.SYMBOLIC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isStructuralIndel() {
|
||||||
|
return getType() == Type.STRUCTURAL_INDEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return true if the variant is symbolic or a large indel
|
||||||
|
*/
|
||||||
|
public boolean isSymbolicOrSV() {
|
||||||
|
return isSymbolic() || isStructuralIndel();
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isMNP() {
|
public boolean isMNP() {
|
||||||
return getType() == Type.MNP;
|
return getType() == Type.MNP;
|
||||||
}
|
}
|
||||||
|
|
@ -1250,6 +1263,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
// performs a pairwise comparison of a single alternate allele against the reference allele (whereas the MIXED type
|
// performs a pairwise comparison of a single alternate allele against the reference allele (whereas the MIXED type
|
||||||
// is reserved for cases of multiple alternate alleles of different types). Therefore, if we've reached this point
|
// is reserved for cases of multiple alternate alleles of different types). Therefore, if we've reached this point
|
||||||
// in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.
|
// in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.
|
||||||
|
|
||||||
|
// Because a number of structural variation callers write the whole alternate allele into the VCF where possible,
|
||||||
|
// this can result in insertion/deletion alleles of structural variant size, e.g. 151+. As of July 2012, we now
|
||||||
|
// classify these as structural events, rather than indel events, as we think differently about the mechanism,
|
||||||
|
// representation, and handling of these events. Check for this case here:
|
||||||
|
if ( ref.length() > MAX_ALLELE_SIZE_FOR_NON_SV || allele.length() > MAX_ALLELE_SIZE_FOR_NON_SV )
|
||||||
|
return Type.STRUCTURAL_INDEL;
|
||||||
|
|
||||||
return Type.INDEL;
|
return Type.INDEL;
|
||||||
|
|
||||||
// old incorrect logic:
|
// old incorrect logic:
|
||||||
|
|
|
||||||
|
|
@ -104,7 +104,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||||
after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||||
after.setCigarString("10M");
|
after.setCigarString("10M");
|
||||||
|
|
||||||
List<SAMRecord> reads = Arrays.asList(before,during,after);
|
List<SAMRecord> reads = Arrays.asList(before, during, after);
|
||||||
|
|
||||||
// create the iterator by state with the fake reads and fake records
|
// create the iterator by state with the fake reads and fake records
|
||||||
li = makeLTBS(reads,readAttributes);
|
li = makeLTBS(reads,readAttributes);
|
||||||
|
|
@ -134,9 +134,9 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||||
// create a test version of the Reads object
|
// create a test version of the Reads object
|
||||||
ReadProperties readAttributes = createTestReadProperties();
|
ReadProperties readAttributes = createTestReadProperties();
|
||||||
|
|
||||||
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,firstLocus,76);
|
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
|
||||||
indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||||
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
|
||||||
indelOnlyRead.setCigarString("76I");
|
indelOnlyRead.setCigarString("76I");
|
||||||
|
|
||||||
List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
|
List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
|
||||||
|
|
@ -148,10 +148,10 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||||
// and considers it to be an indel-containing read.
|
// and considers it to be an indel-containing read.
|
||||||
Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled");
|
Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled");
|
||||||
AlignmentContext alignmentContext = li.next();
|
AlignmentContext alignmentContext = li.next();
|
||||||
Assert.assertEquals(alignmentContext.getLocation().getStart(),firstLocus,"Base pileup is at incorrect location.");
|
Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location.");
|
||||||
ReadBackedPileup basePileup = alignmentContext.getBasePileup();
|
ReadBackedPileup basePileup = alignmentContext.getBasePileup();
|
||||||
Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size");
|
Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size");
|
||||||
Assert.assertSame(basePileup.getReads().get(0),indelOnlyRead,"Read in pileup is incorrect");
|
Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -168,7 +168,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||||
leadingRead.setCigarString("1M75I");
|
leadingRead.setCigarString("1M75I");
|
||||||
|
|
||||||
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
|
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
|
||||||
indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
|
||||||
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||||
indelOnlyRead.setCigarString("76I");
|
indelOnlyRead.setCigarString("76I");
|
||||||
|
|
||||||
|
|
@ -177,7 +177,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||||
fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||||
fullMatchAfterIndel.setCigarString("75I1M");
|
fullMatchAfterIndel.setCigarString("75I1M");
|
||||||
|
|
||||||
List<SAMRecord> reads = Arrays.asList(leadingRead,indelOnlyRead,fullMatchAfterIndel);
|
List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
|
||||||
|
|
||||||
// create the iterator by state with the fake reads and fake records
|
// create the iterator by state with the fake reads and fake records
|
||||||
li = makeLTBS(reads, createTestReadProperties());
|
li = makeLTBS(reads, createTestReadProperties());
|
||||||
|
|
@ -204,7 +204,36 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||||
numAlignmentContextsFound++;
|
numAlignmentContextsFound++;
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(numAlignmentContextsFound,2,"Found incorrect number of alignment contexts");
|
Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do
|
||||||
|
* not negatively influence the ordering of the pileup.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testWholeIndelReadTest2() {
|
||||||
|
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||||
|
|
||||||
|
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header,"read",0,secondLocus,1);
|
||||||
|
read.setReadBases(Utils.dupBytes((byte) 'A', 1));
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte) '@', 1));
|
||||||
|
read.setCigarString("1I");
|
||||||
|
|
||||||
|
List<SAMRecord> reads = Arrays.asList(read);
|
||||||
|
|
||||||
|
// create the iterator by state with the fake reads and fake records
|
||||||
|
li = makeLTBS(reads, createTestReadProperties());
|
||||||
|
|
||||||
|
while(li.hasNext()) {
|
||||||
|
AlignmentContext alignmentContext = li.next();
|
||||||
|
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||||
|
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||||
|
PileupElement pe = p.iterator().next();
|
||||||
|
Assert.assertTrue(pe.isBeforeInsertion());
|
||||||
|
Assert.assertFalse(pe.isAfterInsertion());
|
||||||
|
Assert.assertEquals(pe.getEventBases(), "A");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ReadProperties createTestReadProperties() {
|
private static ReadProperties createTestReadProperties() {
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
* Integration tests for the Depth of Coverage walker
|
||||||
*
|
*
|
||||||
* @Author chartl
|
* @Author chartl
|
||||||
* @Date Feb 25, 2010
|
* @Date Feb 25, 2010
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,92 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: 8/20/12
|
||||||
|
* Time: 9:57 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class VariantsToBinaryPedIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
|
public static final String VTBP_DATA_DIR = "/humgen/gsa-hpprojects/GATK/data/Validation_Data/VariantsToBinaryPed/";
|
||||||
|
|
||||||
|
public static String baseTestString(String inputVCF, String inputMetaData, int gq) {
|
||||||
|
return "-T VariantsToBinaryPed -R " + b37KGReference +
|
||||||
|
" -V " + VTBP_DATA_DIR+inputVCF + " -m "+VTBP_DATA_DIR+inputMetaData + String.format(" -mgq %d",gq) +
|
||||||
|
" -bim %s -fam %s -bed %s";
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNA12878Alone() {
|
||||||
|
String testName = "testNA12878Alone";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString("NA12878.subset.vcf", "CEUTrio.NA12878.fam",10),
|
||||||
|
3,
|
||||||
|
Arrays.asList("411ef932095728bfa5e509c2c0e4cfa8","8e8bc0b5e69f22c54c0960f13c25d26c","02f1c462ebc8576e399d0e94f729fd95")
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest(testName, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNA12878AloneMetaData() {
|
||||||
|
String testName = "testNA12878AloneMetaData";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString("NA12878.subset.vcf", "CEUTrio.NA12878.metadata.txt",10),
|
||||||
|
3,
|
||||||
|
Arrays.asList("411ef932095728bfa5e509c2c0e4cfa8","7251ca4e8a515b698e7e7d25cff91978","02f1c462ebc8576e399d0e94f729fd95")
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest(testName, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCEUTrio() {
|
||||||
|
String testName = "testCEUTrio";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString("CEUTrio.subset.vcf", "CEUTrio.fam",10),
|
||||||
|
3,
|
||||||
|
Arrays.asList("59b93fbb4bb31309b3adc83ba96dd1a2","900f22c6d49a6ba0774466e99592e51d","7887d2e0bf605dbcd0688c552cdb99d5")
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest(testName, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCEUTrioMetaData() {
|
||||||
|
String testName = "testCEUTrioMetaData";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString("CEUTrio.subset.vcf", "CEUTrio.metadata.txt",10),
|
||||||
|
3,
|
||||||
|
Arrays.asList("59b93fbb4bb31309b3adc83ba96dd1a2","2113d2cc0a059e35b1565196b7c5d98f","7887d2e0bf605dbcd0688c552cdb99d5")
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest(testName, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMalformedFam() {
|
||||||
|
String testName = "testMalformedFam";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString("CEUTrio.subset.vcf", "CEUTrio.malformed.fam",10),
|
||||||
|
3,
|
||||||
|
UserException.class
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest(testName, spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -52,8 +52,8 @@ class QSettings {
|
||||||
@Argument(fullName="job_environment_name", shortName="jobEnv", doc="Environment names for the job runner.", required=false)
|
@Argument(fullName="job_environment_name", shortName="jobEnv", doc="Environment names for the job runner.", required=false)
|
||||||
var jobEnvironmentNames: Seq[String] = Nil
|
var jobEnvironmentNames: Seq[String] = Nil
|
||||||
|
|
||||||
@Argument(fullName="memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false)
|
@Argument(fullName="memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes. If not set defaults to 2GB.", required=false)
|
||||||
var memoryLimit: Option[Double] = None
|
var memoryLimit: Option[Double] = Some(2)
|
||||||
|
|
||||||
@Argument(fullName="memory_limit_threshold", shortName="memLimitThresh", doc="After passing this threshold stop increasing memory limit for jobs, in gigabytes.", required=false)
|
@Argument(fullName="memory_limit_threshold", shortName="memLimitThresh", doc="After passing this threshold stop increasing memory limit for jobs, in gigabytes.", required=false)
|
||||||
var memoryLimitThreshold: Option[Double] = None
|
var memoryLimitThreshold: Option[Double] = None
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue