AnalyzeAnnotations can now process only variants that are found in samples that match the -sampleName argument. X-axis of plots no longer use annoying scientific notation.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2684 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-25 20:52:11 +00:00
parent 022601b1a5
commit 24d4082925
3 changed files with 16 additions and 5 deletions

View File

@ -28,7 +28,8 @@ xmax = max(d$value)
outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax));
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n");
axis(1,axTicks(1), format(axTicks(1), scientific=F))
m = weighted.mean(all$value,all$numVariants/sum(all$numVariants))
ma = all[all$value > m,]
mb = all[all$value < m,]
@ -45,7 +46,8 @@ dev.off()
outfile = paste(outputDir, "binnedTiTv_log.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax));
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n");
axis(1,axTicks(1), format(axTicks(1), scientific=F))
abline(v=m,lty=2)
abline(v=m75,lty=2)
abline(v=m25,lty=2)
@ -58,5 +60,6 @@ dev.off()
outfile = paste(outputDir, "binnedTiTv_hist.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h");
plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h",xaxt="n");
axis(1,axTicks(1), format(axTicks(1), scientific=F))
dev.off()

View File

@ -56,6 +56,8 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
private int MIN_VARIANTS_PER_BIN = 1000;
@Argument(fullName = "max_variants_per_bin", shortName = "maxBinSize", doc = "The maximum number of variants in a bin.", required = false)
private int MAX_VARIANTS_PER_BIN = 20000;
@Argument(fullName = "sampleName", shortName = "sampleName", doc = "Only process variants for this sample.", required = false)
private String SAMPLE_NAME = null;
/////////////////////////////
@ -95,7 +97,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
if( rod != null && rod instanceof RodVCF ) {
RodVCF variant = (RodVCF) rod;
if( variant.isSNP() ) {
dataManager.addAnnotations( variant );
dataManager.addAnnotations( variant, SAMPLE_NAME );
}
}
}

View File

@ -53,7 +53,13 @@ public class AnnotationDataManager {
dataTruthSet = new HashMap<String, TreeSet<AnnotationDatum>>();
}
public void addAnnotations( RodVCF variant ) {
public void addAnnotations( RodVCF variant, String sampleName ) {
if( sampleName != null ) { // only process variants that are found in the sample with this sampleName
if( variant.getGenotype(sampleName).isNoCall() ) { // this variant isn't found in this sample so break out
return;
}
} // else, process all samples
// Loop over each annotation in the vcf record
final Map<String,String> infoField = variant.getInfoValues();