1) GATKArgumentCollection has a command to turn off randomization if setting the seed isn't enough. Right now it's only hooked into RankSumTest.
 2) RankSumTest now can be passed a boolean telling it whether to use a dithering or non-randomizing comparator. Unit tested.
 3) VariantsToBinaryPed can now output in both individual-major and SNP-major mode. Integration test.
 4) Updates to PlinkBed-handling python scripts and utilities.
 5) Tool for calculating (LD-corrected) GRMs put under version control. This is analysis for T2D, but I don't want to lose it should something happen to my computer.
This commit is contained in:
Christopher Hartl 2012-10-03 16:02:42 -04:00
parent ac87ed47bb
commit 1be8a88909
4 changed files with 107 additions and 15 deletions

View File

@ -140,6 +140,9 @@ public class GATKArgumentCollection {
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
public boolean nonDeterministicRandomSeed = false;
@Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.")
public boolean disableRandomization = false;
// --------------------------------------------------------------------------------------------------------------
//
// Downsampling Arguments

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsC
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MannWhitneyU;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -19,10 +21,7 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -30,6 +29,7 @@ import java.util.Map;
*/
public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation {
static final boolean DEBUG = false;
private boolean useDithering = true;
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -70,7 +70,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
if (refQuals.isEmpty() && altQuals.isEmpty())
return null;
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
final MannWhitneyU mannWhitneyU = new MannWhitneyU(useDithering);
for (final Double qual : altQuals) {
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
}
@ -131,4 +131,15 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); // need the unBAQed quality score here
}
/**
* Initialize the rank sum test annotation using walker and engine information. Right now this checks to see if
* engine randomization is turned off, and if so does not dither.
* @param walker
* @param toolkit
* @param headerLines
*/
public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
useDithering = ! toolkit.getArguments().disableRandomization;
}
}

View File

@ -66,6 +66,9 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
"(in which case it will be copied to the file you provide as fam output).")
File metaDataFile;
@Input(shortName="mode",fullName="outputMode",required=false,doc="The output file mode (SNP major or individual major)")
OutputMode mode = OutputMode.INDIVIDUAL_MAJOR;
@Output(shortName="bed",fullName = "bed",required=true,doc="output ped file")
PrintStream outBed;
@ -81,6 +84,8 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
@Argument(fullName="majorAlleleFirst",required=false,doc="Sets the major allele to be 'reference' for the bim file, rather than the ref allele")
boolean majorAlleleFirst = false;
enum OutputMode { INDIVIDUAL_MAJOR,SNP_MAJOR }
private static double APPROX_CM_PER_BP = 1000000.0/750000.0;
private static final byte HOM_REF = 0x0;
@ -138,14 +143,18 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
throw new UserException("No metadata provided for sample "+sample);
}
}
try {
File temp = File.createTempFile("VariantsToBPed_"+sample, ".tmp");
printMap.put(sample,new PrintStream(temp));
tempFiles.put(sample,temp);
} catch (IOException e) {
throw new ReviewedStingException("Error creating temporary file",e);
if ( mode == OutputMode.INDIVIDUAL_MAJOR ) {
// only need to instantiate the files and buffers if in individual major.
// Cut down on memory.
try {
File temp = File.createTempFile("VariantsToBPed_"+sample, ".tmp");
printMap.put(sample,new PrintStream(temp));
tempFiles.put(sample,temp);
} catch (IOException e) {
throw new ReviewedStingException("Error creating temporary file",e);
}
genotypeBuffer.put(sample,new byte[BUFFER_SIZE]);
}
genotypeBuffer.put(sample,new byte[BUFFER_SIZE]);
famOrder.add(sample);
}
}
@ -195,6 +204,17 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
// write an entry into the map file
outBim.printf("%s\t%s\t%.2f\t%d\t%s\t%s%n",vc.getChr(),getID(vc),APPROX_CM_PER_BP*vc.getStart(),vc.getStart(),
refOut,altOut);
if ( mode == OutputMode.INDIVIDUAL_MAJOR ) {
writeIndividualMajor(vc,altMajor);
} else {
writeSNPMajor(vc,altMajor);
}
return 1;
}
public void writeIndividualMajor(VariantContext vc, boolean altMajor) {
// store genotypes per sample into the buffer
for ( Genotype g : vc.getGenotypes() ) {
String sample = g.getSampleName();
@ -202,6 +222,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
byte enc = getEncoding(g,genotypeCount,altMajor);
samBuf[byteCount] |= enc;
}
genotypeCount++;
if ( genotypeCount % 4 == 0 ) {
byteCount++;
@ -222,8 +243,29 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
}
genotypeCount = 0;
}
}
return 1;
public void writeSNPMajor(VariantContext vc, boolean altMajor) {
// for each sample, write the genotype into the bed file, in the
// order of the fam file
genotypeCount = 0;
byteCount = 0;
byte[] bytes = new byte[(3+famOrder.size())/4]; // this exploits java integer fractions, which round down by default (1-4) -> 1, (5-8) -> 2
for ( Genotype g : vc.getGenotypesOrderedBy(famOrder) ) {
byte enc = getEncoding(g,genotypeCount,altMajor);
bytes[byteCount] |= enc;
genotypeCount++;
if ( genotypeCount % 4 == 0 ) {
byteCount++;
genotypeCount = 0;
}
}
try {
outBed.write(bytes);
} catch (IOException e) {
throw new ReviewedStingException("Error writing to output bed file",e);
}
}
public Integer reduce(Integer m, Integer r) {
@ -236,6 +278,14 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
public void onTraversalDone(Integer numSites) {
logger.info(String.format("%d sites processed!",numSites));
if ( mode == OutputMode.INDIVIDUAL_MAJOR ) {
mergeGenotypeTempFiles(numSites);
}
}
private void mergeGenotypeTempFiles(int numSites) {
// push out the remaining genotypes and close stream
for ( String sample : printMap.keySet() ) {
try {
@ -278,7 +328,6 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
throw new ReviewedStingException("Error reading form temp file for input.",e);
}
}
}
private byte getEncoding(Genotype g, int offset, boolean altMajor) {
@ -355,7 +404,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
private void writeBedHeader() {
// write magic bits into the ped file
try {
outBed.write(new byte[] { (byte) 0x6c, (byte) 0x1b, 0x0});
outBed.write(new byte[] { (byte) 0x6c, (byte) 0x1b, (byte) (mode == OutputMode.INDIVIDUAL_MAJOR ? 0x0 : 0x1)});
// ultimately, the bed will be in individual-major mode
} catch (IOException e) {
throw new ReviewedStingException("error writing to output file.");

View File

@ -28,6 +28,13 @@ public class VariantsToBinaryPedIntegrationTest extends WalkerTest {
}
public static String baseTestString(String inputVCF, String inputMetaData, int gq, String mode) {
return "-T VariantsToBinaryPed -R " + b37KGReference + " -mode "+mode +
" -V " + VTBP_DATA_DIR+inputVCF + " -m "+VTBP_DATA_DIR+inputMetaData + String.format(" -mgq %d",gq) +
" -bim %s -fam %s -bed %s";
}
@Test
public void testNA12878Alone() {
String testName = "testNA12878Alone";
@ -52,6 +59,18 @@ public class VariantsToBinaryPedIntegrationTest extends WalkerTest {
executeTest(testName, spec);
}
@Test
public void testNA12878AloneSNPMajor() {
String testName = "testNA12878AloneSNPMajor";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("NA12878.subset.vcf", "CEUTrio.NA12878.metadata.txt",10,"SNP_MAJOR"),
3,
Arrays.asList("411ef932095728bfa5e509c2c0e4cfa8","7251ca4e8a515b698e7e7d25cff91978","ada1acc475d096012b921b3219c3a446")
);
executeTest(testName, spec);
}
@Test
public void testNA12878HighGQ() {
String testName = "testNA12878HighGQ";
@ -86,6 +105,16 @@ public class VariantsToBinaryPedIntegrationTest extends WalkerTest {
);
}
@Test
public void test1000GWithIndelsSNPMajor() {
String testName = "test1000GWithIndelsSNPMajor";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("1000G_selected_allVariants.vcf", "1000G_selected_allVariants.md.txt",0,"SNP_MAJOR"),
3,
Arrays.asList("3c98112434d9948dc47da72ad14e8d84","4a0ba3d0594b06306aa6459e4e28ec9a","451498ceff06c1649890900fa994f1af")
);
}
@Test
public void test1000G_Symbolic() {
String testName = "test1000G_Symbolic";