A new subversion repository for matlab scripts, so they don't go away in case of catastrophic Yang failure.
Might be useful if you want to learn matlab or make use of matlab scripts. It does have some advantages over R (and some disadvantages as well) This script loads data from the FHS directory listing the average DoC per exon per gene per pool and makes a gene-by-gene box plot of the resulting data, broken up between pilot and production. It will also automatically save the resulting figures if you un-comment two lines. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2181 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
af22ca1b47
commit
d5cda76c8b
|
|
@ -0,0 +1,69 @@
|
|||
close all
|
||||
%load mean_coverage_per_exon_per_pool.mat <-- doesn't work from
|
||||
%/sting/matlab -- but this does
|
||||
|
||||
imp = importdata('/humgen/gsa-hphome1/projects/FHS/results/production_averageDoCByExonAndGene.txt');
|
||||
data = imp.data;
|
||||
textdata = imp.textdata;
|
||||
|
||||
pilot_pos = ismember(textdata(:,2),'CEPH3')+ismember(textdata(:,2),'CEPH2')+ismember(textdata(:,2),'CEPH1');
|
||||
pilot_data = data( find(pilot_pos == 1), : );
|
||||
prod_data = data( find(pilot_pos == 0) , :);
|
||||
pilot_text = textdata( find ( pilot_pos == 1) , : );
|
||||
prod_text = textdata( find ( pilot_pos == 0 ), : );
|
||||
|
||||
lim = length(data);
|
||||
|
||||
% get unique names
|
||||
|
||||
gnames = [textdata(1,3)];
|
||||
prevn = 'hello doofus';
|
||||
for ii = 2 : lim/23 % first 1/23 of the file contains all the gene names (rest are repeats)
|
||||
n = textdata{ii,3};
|
||||
if ( length(prevn) == length(n) && prevn(1)==n(1) && prevn(end)==n(end) )
|
||||
% quick test to check if gene is same as previous
|
||||
else
|
||||
if ( ~ ismember(gnames,n) )
|
||||
gnames = [gnames; textdata(ii,3)];
|
||||
% more exhaustive test to see if gene name is novel
|
||||
end
|
||||
prevn = n;
|
||||
end
|
||||
end
|
||||
|
||||
% plot the groups
|
||||
num_genes = size(gnames) % yes we want to print this
|
||||
|
||||
to_plot = 40;
|
||||
plotno = 0;
|
||||
filenamebaseprod = 'FHS_production_gene_cvg_boxplot_';
|
||||
filenamebasepilot = 'FHS_pilot_gene_cvg_boxplot_';
|
||||
for genes = 1 : to_plot : num_genes
|
||||
plotno = plotno + 1;
|
||||
pilot_positions = [];
|
||||
prod_positions = [];
|
||||
for g = genes:genes+to_plot
|
||||
%n = gnames{g,1}
|
||||
if ( g < length(gnames) )
|
||||
pilot_positions = [pilot_positions; find(ismember(pilot_text(:,3),gnames{g,1}) == 1)];
|
||||
prod_positions = [prod_positions; find(ismember(prod_text(:,3),gnames{g,1}) == 1 )];
|
||||
end
|
||||
end
|
||||
depths_prod = prod_data(prod_positions,2);
|
||||
depths_pilot = pilot_data(pilot_positions,2);
|
||||
grenes_prod = prod_text(prod_positions,3);
|
||||
grenes_pilot = pilot_text(pilot_positions,3);
|
||||
h = figure
|
||||
hp = subplot(1,1,1)
|
||||
boxplot(depths_prod,grenes_prod,'plotstyle','compact','orientation','vertical','datalim',[0,10000],'whisker',0,'symbol','r+'), title('Production')
|
||||
set(hp,'YLim',[-1000,11000])
|
||||
y = figure
|
||||
yp = subplot(1,1,1)
|
||||
boxplot(depths_pilot,grenes_pilot,'plotstyle','compact','orientation','vertical','datalim',[0,10000],'whisker',0,'symbol','r+'), title('Pilot')
|
||||
set(yp,'YLim',[-1000,11000])
|
||||
|
||||
% -- uncomment these lines to save the files --
|
||||
%saveas(h,strcat(filenamebaseprod,num2str(plotno)),'psc2');
|
||||
%saveas(y,strcat(filenamebasepilot,num2str(plotno)),'psc2');
|
||||
end
|
||||
|
||||
Loading…
Reference in New Issue