From a26096dd750711dd58b90638b18b551058182d61 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 5 Nov 2011 10:56:25 -0400 Subject: [PATCH] pair two single-end SAMs --- pairsam.pl | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 pairsam.pl diff --git a/pairsam.pl b/pairsam.pl new file mode 100755 index 0000000..bb901bb --- /dev/null +++ b/pairsam.pl @@ -0,0 +1,69 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; +use Getopt::Std; + +my %opts = (a=>700, b=>100); +getopts('a:b:', \%opts); +die("Usage: pairsam.pl [-a $opts{a}] \n") if (@ARGV < 2); + +my ($fh0, $fh1, $l0, $l1); + +open($fh0, $ARGV[0] =~ /\.gz$/? "gzip -dc $ARGV[0] |" : $ARGV[0]) || die; +open($fh1, $ARGV[1] =~ /\.gz$/? "gzip -dc $ARGV[1] |" : $ARGV[1]) || die; + +while ($l0 = <$fh0>) { last if $l0 !~ /^@/; print $l0 } +while ($l1 = <$fh1>) { last if $l1 !~ /^@/ } + +while (defined($l0) && defined($l1)) { + my ($r0, $r1) = &pair_line(\$l0, \$l1, $opts{a}, $opts{b}); + while ($l0 = <$fh0>) { last if $l0 !~ /^$r0/; print $l0; } + while ($l1 = <$fh1>) { last if $l1 !~ /^$r1/; print $l1; } +} + +close($fh0); close($fh1); + +sub pair_line { + my ($l0, $l1, $max_ins, $min_ins) = @_; + my @t0 = split("\t", $$l0); + my @t1 = split("\t", $$l1); + my ($n0, $n1) = ($t0[0], $t1[0]); + my ($cigar, $a0, $a1, $p0, $p1); + # length in alignment + $cigar = $t0[5]; $a0 = 0; $cigar =~ s/(\d+)[MI]/$a0 += $1/eg; + $cigar = $t1[5]; $a1 = 0; $cigar =~ s/(\d+)[MI]/$a1 += $1/eg; + # 5'-end alignment position on the read + $p0 = $t0[1] == 16? $t0[3] + $a0 : $t0[3]; + $p1 = $t1[1] == 16? $t1[3] + $a1 : $t1[3]; + # adjust mapping quality + if ($t0[2] eq $t1[2] && $t0[1]+$t1[1] == 16) { # on the same chr and forward-reverse + if (abs($p0 - $p1) <= $max_ins && abs($p0 - $p1) >= $min_ins) { # within the right insert size distribution + $t0[1] |= 2; $t1[1] |= 2; # flag as paired + if ($t0[4] < $t1[4]) { # increase mapQ + $t0[4] = $t0[4] + 10 < $t1[4]? $t0[4] + 10 : $t1[4]; + } else { + $t1[4] = $t1[4] + 10 < $t0[4]? $t1[4] + 10 : $t0[4]; + } + } + } + unless ($t0[1]&2) { # decrease mapQ if unpaired + $t0[4] = $t0[4] > 10? $t0[4] - 10 : 0; + $t1[4] = $t1[4] > 10? $t1[4] - 10 : 0; + } + # strip off /[12] + $t0[0] =~ s/\/[12]$//; $t1[0] =~ s/\/[12]$//; + # update FLAG + $t0[1] |= 0x41 | (($t1[1]&16)? 0x20 : 0) | (($t1[1]&4)? 0x8 : 0); + $t1[1] |= 0x81 | (($t0[1]&16)? 0x20 : 0) | (($t0[1]&4)? 0x8 : 0); + # update mate positions + if ($t0[2] eq $t1[2]) { + $t0[6] = $t1[6] = '='; + $t0[8] = $p1 - $p0; $t1[8] = $p0 - $p1; + } else { $t0[6] = $t1[2]; $t1[6] = $t0[2]; } + $t0[7] = $t1[3]; $t1[7] = $t0[3]; + # print out + print join("\t", @t0); + print join("\t", @t1); + return ($n0, $n1); +}