A new subversion repository for matlab scripts, so they don't go away in case of catastrophic Yang failure.

Might be useful if you want to learn matlab or make use of matlab scripts. It does have some advantages over R (and some disadvantages as well)

This script loads data from the FHS directory listing the average DoC per exon per gene per pool and makes a gene-by-gene box plot of the resulting data, broken up between pilot and production. It will also automatically save the resulting figures if you un-comment two lines.




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2181 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-11-30 16:25:07 +00:00
parent af22ca1b47
commit d5cda76c8b
1 changed files with 69 additions and 0 deletions

View File

@ -0,0 +1,69 @@
close all
%load mean_coverage_per_exon_per_pool.mat <-- doesn't work from
%/sting/matlab -- but this does
imp = importdata('/humgen/gsa-hphome1/projects/FHS/results/production_averageDoCByExonAndGene.txt');
data = imp.data;
textdata = imp.textdata;
pilot_pos = ismember(textdata(:,2),'CEPH3')+ismember(textdata(:,2),'CEPH2')+ismember(textdata(:,2),'CEPH1');
pilot_data = data( find(pilot_pos == 1), : );
prod_data = data( find(pilot_pos == 0) , :);
pilot_text = textdata( find ( pilot_pos == 1) , : );
prod_text = textdata( find ( pilot_pos == 0 ), : );
lim = length(data);
% get unique names
gnames = [textdata(1,3)];
prevn = 'hello doofus';
for ii = 2 : lim/23 % first 1/23 of the file contains all the gene names (rest are repeats)
n = textdata{ii,3};
if ( length(prevn) == length(n) && prevn(1)==n(1) && prevn(end)==n(end) )
% quick test to check if gene is same as previous
else
if ( ~ ismember(gnames,n) )
gnames = [gnames; textdata(ii,3)];
% more exhaustive test to see if gene name is novel
end
prevn = n;
end
end
% plot the groups
num_genes = size(gnames) % yes we want to print this
to_plot = 40;
plotno = 0;
filenamebaseprod = 'FHS_production_gene_cvg_boxplot_';
filenamebasepilot = 'FHS_pilot_gene_cvg_boxplot_';
for genes = 1 : to_plot : num_genes
plotno = plotno + 1;
pilot_positions = [];
prod_positions = [];
for g = genes:genes+to_plot
%n = gnames{g,1}
if ( g < length(gnames) )
pilot_positions = [pilot_positions; find(ismember(pilot_text(:,3),gnames{g,1}) == 1)];
prod_positions = [prod_positions; find(ismember(prod_text(:,3),gnames{g,1}) == 1 )];
end
end
depths_prod = prod_data(prod_positions,2);
depths_pilot = pilot_data(pilot_positions,2);
grenes_prod = prod_text(prod_positions,3);
grenes_pilot = pilot_text(pilot_positions,3);
h = figure
hp = subplot(1,1,1)
boxplot(depths_prod,grenes_prod,'plotstyle','compact','orientation','vertical','datalim',[0,10000],'whisker',0,'symbol','r+'), title('Production')
set(hp,'YLim',[-1000,11000])
y = figure
yp = subplot(1,1,1)
boxplot(depths_pilot,grenes_pilot,'plotstyle','compact','orientation','vertical','datalim',[0,10000],'whisker',0,'symbol','r+'), title('Pilot')
set(yp,'YLim',[-1000,11000])
% -- uncomment these lines to save the files --
%saveas(h,strcat(filenamebaseprod,num2str(plotno)),'psc2');
%saveas(y,strcat(filenamebasepilot,num2str(plotno)),'psc2');
end