From d5cda76c8b8ea383e17ac36c35365b85628b7747 Mon Sep 17 00:00:00 2001 From: chartl Date: Mon, 30 Nov 2009 16:25:07 +0000 Subject: [PATCH] A new subversion repository for matlab scripts, so they don't go away in case of catastrophic Yang failure. Might be useful if you want to learn matlab or make use of matlab scripts. It does have some advantages over R (and some disadvantages as well) This script loads data from the FHS directory listing the average DoC per exon per gene per pool and makes a gene-by-gene box plot of the resulting data, broken up between pilot and production. It will also automatically save the resulting figures if you un-comment two lines. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2181 348d0f76-0448-11de-a6fe-93d51630548a --- matlab/make_gene_coverage_box_plot.m | 69 ++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 matlab/make_gene_coverage_box_plot.m diff --git a/matlab/make_gene_coverage_box_plot.m b/matlab/make_gene_coverage_box_plot.m new file mode 100755 index 000000000..e626442d6 --- /dev/null +++ b/matlab/make_gene_coverage_box_plot.m @@ -0,0 +1,69 @@ +close all +%load mean_coverage_per_exon_per_pool.mat <-- doesn't work from +%/sting/matlab -- but this does + +imp = importdata('/humgen/gsa-hphome1/projects/FHS/results/production_averageDoCByExonAndGene.txt'); +data = imp.data; +textdata = imp.textdata; + +pilot_pos = ismember(textdata(:,2),'CEPH3')+ismember(textdata(:,2),'CEPH2')+ismember(textdata(:,2),'CEPH1'); +pilot_data = data( find(pilot_pos == 1), : ); +prod_data = data( find(pilot_pos == 0) , :); +pilot_text = textdata( find ( pilot_pos == 1) , : ); +prod_text = textdata( find ( pilot_pos == 0 ), : ); + +lim = length(data); + +% get unique names + +gnames = [textdata(1,3)]; +prevn = 'hello doofus'; +for ii = 2 : lim/23 % first 1/23 of the file contains all the gene names (rest are repeats) + n = textdata{ii,3}; + if ( length(prevn) == length(n) && prevn(1)==n(1) && prevn(end)==n(end) ) + % quick test to check if gene is same as previous + else + if ( ~ ismember(gnames,n) ) + gnames = [gnames; textdata(ii,3)]; + % more exhaustive test to see if gene name is novel + end + prevn = n; + end +end + +% plot the groups +num_genes = size(gnames) % yes we want to print this + +to_plot = 40; +plotno = 0; +filenamebaseprod = 'FHS_production_gene_cvg_boxplot_'; +filenamebasepilot = 'FHS_pilot_gene_cvg_boxplot_'; +for genes = 1 : to_plot : num_genes + plotno = plotno + 1; + pilot_positions = []; + prod_positions = []; + for g = genes:genes+to_plot + %n = gnames{g,1} + if ( g < length(gnames) ) + pilot_positions = [pilot_positions; find(ismember(pilot_text(:,3),gnames{g,1}) == 1)]; + prod_positions = [prod_positions; find(ismember(prod_text(:,3),gnames{g,1}) == 1 )]; + end + end + depths_prod = prod_data(prod_positions,2); + depths_pilot = pilot_data(pilot_positions,2); + grenes_prod = prod_text(prod_positions,3); + grenes_pilot = pilot_text(pilot_positions,3); + h = figure + hp = subplot(1,1,1) + boxplot(depths_prod,grenes_prod,'plotstyle','compact','orientation','vertical','datalim',[0,10000],'whisker',0,'symbol','r+'), title('Production') + set(hp,'YLim',[-1000,11000]) + y = figure + yp = subplot(1,1,1) + boxplot(depths_pilot,grenes_pilot,'plotstyle','compact','orientation','vertical','datalim',[0,10000],'whisker',0,'symbol','r+'), title('Pilot') + set(yp,'YLim',[-1000,11000]) + + % -- uncomment these lines to save the files -- + %saveas(h,strcat(filenamebaseprod,num2str(plotno)),'psc2'); + %saveas(y,strcat(filenamebasepilot,num2str(plotno)),'psc2'); +end +