Adding a script for diff'ing the output of samtools and the GATK for the whole genome and each individual chromosome.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@882 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-02 21:19:39 +00:00
parent 3998085e4b
commit 056fcdc31c
1 changed files with 66 additions and 0 deletions

View File

@ -0,0 +1,66 @@
#!/usr/bin/csh
# ##############################################################################
# this file can be used to diff the flagstat values for a whole genome,
# along with sub-diffs for each chromosome. This may be helpful if you're
# debugging a specific reads problem, and you want to know the differences
# between the output of the GATK and samtools
# ##############################################################################
if ($#argv != 3) then
echo "usage readStatsDiff <bamfile.bam> <referenceSequence.fasta> <outputFile>"; exit 1
endif
if (! -f $1) then
echo bam file $2 does not seem to exist; exit 1
endif
if (! -f $2) then
echo reference file $2 does not seem to exist; exit 1
endif
if ( -e $3) then
echo output file $3 already exists!; exit 1
endif
# set the bam and the reference index
set bam = $1
set seq = $2
set out = $3
# diff the whole chromosome
java -ea -Xmx4096m \
-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8015 \
-jar dist/GenomeAnalysisTK.jar \
-S SILENT \
-T FlagStatRead \
-I $bam \
-R $seq \
-oe 1.out
samtools flagstat $bam > 2.out
echo "\n" >> $out
echo results for whole bam file >> $out
echo "\n" >> $out
diff 1.out 2.out >> $out
rm 1.out
rm 2.out
foreach chromosome (chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX)
java -ea -Xmx4096m \
-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8015 \
-jar dist/GenomeAnalysisTK.jar \
-S SILENT \
-T FlagStatRead \
-I $bam \
-R $seq \
-oe 1.out \
-L $chromosome
samtools view -h -b $bam $chromosome > temp.bam
samtools flagstat temp.bam > 2.out
echo results for $chromosome >> $out
echo "\n" >> $out
diff 1.out 2.out >> $out
rm temp.bam
rm 1.out
rm 2.out
end