Init

2025-04-17 16:17:44 +08:00 · 2025-04-17 16:17:44 +08:00 · 588abd70f2
parent 433c724211
commit 588abd70f2
475 changed files with 181298 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,11 @@
 # ---> C++
 # Prerequisites
 *.d
+/test
+/.vscode
+/build
+build.sh
+run.sh

 # Compiled Object files
 *.slo
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,10 @@
+cmake_minimum_required(VERSION 3.0)
+project(FastSort)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
+# set(CMAKE_BUILD_TYPE Debug)
+# set(CMAKE_BUILD_TYPE Release)
+add_definitions(-DSHOW_PERF=1)
+add_definitions(-DSPDLOG_COMPILED_LIB)
+add_subdirectory(src)
--- a/ext/argparse/argparse.hpp
+++ b/ext/argparse/argparse.hpp
--- a/ext/htslib/.gitignore
+++ b/ext/htslib/.gitignore
@ -0,0 +1,85 @@
+*.o
+*.pico
+*.obj
+*.dSYM
+*.exe
+*.dll
+*.dll.a
+*.pc.tmp
+*-uninstalled.pc
+config_vars.h
+/version.h
+
+autom4te.cache
+config.cache
+config.guess
+config.h
+config.h.in
+config.log
+config.mk
+config.status
+config.sub
+configure
+install-sh
+
+hfile_*.bundle
+hfile_*.cygdll
+hfile_*.dll
+hfile_*.so
+
+hts-object-files
+htslib_static.mk
+htscodecs.mk
+
+cyg*.dll
+lib*.a
+lib*.dll
+lib*.dylib
+lib*.so
+lib*.so.*
+
+header-exports.txt
+shlib-exports-*.txt
+
+/annot-tsv
+/bgzip
+/htsfile
+/tabix
+/test/*/FAIL*
+/test/bgzf_boundaries/*.tmp.*
+/test/faidx/*.tmp*
+/test/fieldarith
+/test/hfile
+/test/hts_endian
+/test/longrefs/*.tmp.*
+/test/pileup
+/test/pileup_mod
+/test/plugins-dlhts
+/test/sam
+/test/tabix/*.tmp.*
+/test/test-bcf-sr
+/test/test-bcf-translate
+/test/test-bcf_set_variant_type
+/test/test_bgzf
+/test/test_expr
+/test/test_faidx
+/test/test_index
+/test/test_introspection
+/test/test_kfunc
+/test/test_khash
+/test/test_kstring
+/test/test_mod
+/test/test_nibbles
+/test/test-parse-reg
+/test/test_realn
+/test/test-regidx
+/test/test_str2int
+/test/test_time_funcs
+/test/test-vcf-api
+/test/test-vcf-sweep
+/test/test_view
+/test/thrash_threads[1-7]
+/test/*.tmp
+/test/*.tmp.*
+
+/TAGS
--- a/ext/htslib/INSTALL
+++ b/ext/htslib/INSTALL
@ -0,0 +1,316 @@
+                    Building and Installing HTSlib
+                    ==============================
+
+Requirements
+============
+
+Building HTSlib requires a few programs and libraries to be present.
+See the "System Specific Details" below for guidance on how to install
+these.
+
+At least the following are required:
+
+    GNU make
+    C compiler (e.g. gcc or clang)
+
+In addition, building the configure script requires:
+
+    autoheader
+    autoconf
+    autoreconf
+
+Running the configure script uses awk, along with a number of
+standard UNIX tools (cat, cp, grep, mv, rm, sed, among others).  Almost
+all installations will have these already.
+
+Running the test harness (make test) uses:
+
+    bash
+    perl
+
+HTSlib uses the following external libraries.  Building requires both the
+library itself, and include files needed to compile code that uses functions
+from the library.  Note that some Linux distributions put include files in
+a development ('-dev' or '-devel') package separate from the main library.
+
+    zlib       (required)
+    libbz2     (required, unless configured with --disable-bz2)
+    liblzma    (required, unless configured with --disable-lzma)
+    libcurl    (optional, but strongly recommended)
+    libcrypto  (optional for Amazon S3 support; not needed on MacOS)
+    libdeflate (optional, but strongly recommended for faster gzip)
+
+Disabling libbzip2 and liblzma will make some CRAM files unreadable, so
+is not recommended.
+
+Using libcurl provides HTSlib with network protocol support, for
+example it enables the use of ftp://, http://, and https:// URLs.
+It is also required if direct access to Amazon S3 or Google Cloud
+Storage is enabled.
+
+Amazon S3 support requires an HMAC function to calculate a message
+authentication code.  On MacOS, the CCHmac function from the standard
+library is used.  Systems that do not have CCHmac will get this from
+libcrypto.  libcrypto is part of OpenSSL or one of its derivatives (LibreSSL
+or BoringSSL).
+
+On Microsoft Windows we recommend use of Mingw64/Msys2.  Whilst the
+code may work on Windows with other environments, these have not been
+verified.  Use of the configure script is a requirement too.
+
+Update htscodecs submodule
+==========================
+
+Note that this section only applies to git checkouts.  If you're building
+from a release tar file, you can skip this section.
+
+Some parts of HTSlib are provided by the external "htscodecs" project.  This
+is included as a submodule.  When building from the git repository,
+either clone the project using "git clone --recurse-submodules", or run:
+
+    git submodule update --init --recursive
+
+to ensure the correct version of the submodule is present.
+
+It is also possible to link against an external libhtscodecs library
+by using the '--with-external-htscodecs' configure option.  When
+this is used, the submodule files will be ignored.
+
+Building Configure
+==================
+
+This step is only needed if configure.ac has been changed, or if configure
+does not exist (for example, when building from a git clone).  The
+configure script and config.h.in can be built by running:
+
+    autoreconf -i
+
+Basic Installation
+==================
+
+To build and install HTSlib, 'cd' to the htslib-1.x directory containing
+the package's source and type the following commands:
+
+    ./configure
+    make
+    make install
+
+The './configure' command checks your build environment and allows various
+optional functionality to be enabled (see Configuration below).  If you
+don't want to select any optional functionality, you may wish to omit
+configure and just type 'make; make install' as for previous versions
+of HTSlib.  However if the build fails you should run './configure' as
+it can diagnose the common reasons for build failures.
+
+The 'make' command builds the HTSlib library and various useful
+utilities: bgzip, htsfile, and tabix.  If compilation fails you should
+run './configure' as it can diagnose problems with your build environment
+that cause build failures.
+
+The 'make install' command installs the libraries, library header files,
+utilities, several manual pages, and a pkgconfig file to /usr/local.
+The installation location can be changed by configuring with --prefix=DIR
+or via 'make prefix=DIR install' (see Installation Locations below).
+Shared library permissions can be set via e.g. 'make install LIB_PERM=755'.
+
+
+Configuration
+=============
+
+By default, './configure' examines your build environment, checking for
+requirements such as the zlib development files, and arranges for a plain
+HTSlib build.  The following configure options can be used to enable
+various features and specify further optional external requirements:
+
+--enable-plugins
+    Use plugins to implement exotic file access protocols and other
+    specialised facilities.  This enables such facilities to be developed
+    and packaged outwith HTSlib, and somewhat isolates HTSlib-using programs
+    from their library dependencies.  By default (or with --disable-plugins),
+    any enabled pluggable facilities (such as libcurl file access) are built
+    directly within HTSlib.
+
+    Programs that are statically linked to a libhts.a with plugins enabled
+    need to be linked using -rdynamic or a similar linker option.
+
+    The <https://github.com/samtools/htslib-plugins> repository contains
+    several additional plugins, including the iRODS (<http://irods.org/>)
+    file access plugin previously distributed with HTSlib.
+
+--with-plugin-dir=DIR
+    Specifies the directory into which plugins built while building HTSlib
+    should be installed; by default, LIBEXECDIR/htslib.
+
+--with-plugin-path=DIR:DIR:DIR...
+    Specifies the list of directories that HTSlib will search for plugins.
+    By default, only the directory specified via --with-plugin-dir will be
+    searched; you can use --with-plugin-path='DIR:$(plugindir):DIR' and so
+    on to cause additional directories to be searched.
+
+--with-external-htscodecs
+    Build and link against an external copy of the htscodecs library
+    instead of using the source files in the htscodecs directory.
+
+--enable-libcurl
+    Use libcurl (<http://curl.se/>) to implement network access to
+    remote files via FTP, HTTP, HTTPS, etc.  By default or with
+    --enable-libcurl=check, configure will probe for libcurl and include
+    this functionality if libcurl is available.  Use --disable-libcurl
+    to prevent this.
+
+--enable-gcs
+    Implement network access to Google Cloud Storage.  By default or with
+    --enable-gcs=check, this is enabled when libcurl is enabled.
+
+--enable-s3
+    Implement network access to Amazon AWS S3.  By default or with
+    --enable-s3=check, this is enabled when libcurl is enabled.
+
+--disable-bz2
+    Bzip2 is an optional compression codec format for CRAM, included
+    in HTSlib by default.  It can be disabled with --disable-bz2, but
+    be aware that not all CRAM files may be possible to decode.
+
+--disable-lzma
+    LZMA is an optional compression codec for CRAM, included in HTSlib
+    by default.  It can be disabled with --disable-lzma, but be aware
+    that not all CRAM files may be possible to decode.
+
+--with-libdeflate
+    Libdeflate is a heavily optimized library for DEFLATE-based compression
+    and decompression.  It also includes a fast crc32 implementation.
+    By default, ./configure will probe for libdeflate and use it if
+    available.  To prevent this, use --without-libdeflate.
+
+Each --enable-FEATURE/--disable-FEATURE/--with-PACKAGE/--without-PACKAGE
+option listed also has an opposite, e.g., --without-external-htscodecs
+or --disable-plugins.  However, apart from those options for which the
+default is to probe for related facilities, using these opposite options
+is mostly unnecessary as they just select the default configure behaviour.
+
+The configure script also accepts the usual options and environment variables
+for tuning installation locations and compilers: type './configure --help'
+for details.  For example,
+
+    ./configure CC=icc --prefix=/opt/icc-compiled
+
+would specify that HTSlib is to be built with icc and installed into bin,
+lib, etc subdirectories under /opt/icc-compiled.
+
+If dependencies have been installed in non-standard locations (i.e. not on
+the normal include and library search paths) then the CPPFLAGS and LDFLAGS
+environment variables can be used to set the options needed to find them.
+For example, NetBSD users may use:
+
+    ./configure CPPFLAGS=-I/usr/pkg/include \
+                LDFLAGS='-L/usr/pkg/lib -Wl,-R/usr/pkg/lib'
+
+to allow compiling and linking against dependencies installed via the ports
+collection.
+
+Installation Locations
+======================
+
+By default, 'make install' installs HTSlib libraries under /usr/local/lib,
+HTSlib header files under /usr/local/include, utility programs under
+/usr/local/bin, etc.  (To be precise, the header files are installed within
+a fixed 'htslib' subdirectory under the specified .../include location.)
+
+You can specify a different location to install HTSlib by configuring
+with --prefix=DIR or specify locations for particular parts of HTSlib by
+configuring with --libdir=DIR and so on.  Type './configure --help' for
+the full list of such install directory options.
+
+Alternatively you can specify different locations at install time by
+typing 'make prefix=DIR install' or 'make libdir=DIR install' and so on.
+Consult the list of prefix/exec_prefix/etc variables near the top of the
+Makefile for the full list of such variables that can be overridden.
+
+You can also specify a staging area by typing 'make DESTDIR=DIR install',
+possibly in conjunction with other --prefix or prefix=DIR settings.
+For example,
+
+    make DESTDIR=/tmp/staging prefix=/opt
+
+would install into bin, lib, etc subdirectories under /tmp/staging/opt.
+
+
+System Specific Details
+=======================
+
+Installing the prerequisites is system dependent and there is more
+than one correct way of satisfying these, including downloading them
+from source, compiling and installing them yourself.
+
+For people with super-user access, we provide an example set of commands
+below for installing the dependencies on a variety of operating system
+distributions.  Note these are not specific recommendations on distribution,
+compiler or SSL implementation.  It is assumed you already have the core set
+of packages for the given distribution - the lists may be incomplete if
+this is not the case.
+
+Debian / Ubuntu
+---------------
+
+sudo apt-get update  # Ensure the package list is up to date
+sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libdeflate-dev
+
+Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev.
+
+RedHat / CentOS
+---------------
+
+sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel libdeflate-devel
+
+Note: On some versions perl FindBin will need to be installed to make the tests work.
+
+sudo yum install perl-FindBin
+
+Alpine Linux
+------------
+
+doas apk update  # Ensure the package list is up to date
+doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev openssl-dev
+
+Ideally also install a copy of libdeflate-dev for faster (de)compression.
+This can be found in the Alpine community repository.
+
+Note: some older Alpine versions use libressl-dev rather than openssl-dev.
+
+OpenSUSE
+--------
+
+sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel
+
+Also install libdeflate-devel, available on OpenSUSE Leap 15.4 onwards
+or directly via git releases above.
+
+Windows MSYS2/MINGW64
+---------------------
+
+The configure script must be used as without it the compilation will
+likely fail.
+
+Follow MSYS2 installation instructions at
+https://www.msys2.org/wiki/MSYS2-installation/
+
+Then relaunch to MSYS2 shell using the "MSYS2 MinGW x64" executable.
+Once in that environment (check $MSYSTEM equals "MINGW64") install the
+compilers using pacman -S and the following package list:
+
+base-devel mingw-w64-x86_64-toolchain
+mingw-w64-x86_64-libdeflate mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2
+mingw-w64-x86_64-xz mingw-w64-x86_64-curl mingw-w64-x86_64-autotools
+mingw-w64-x86_64-tools-git
+
+(The last is only needed for building libraries compatible with MSVC.)
+
+HP-UX
+-----
+
+HP-UX requires that shared libraries have execute permission.  The
+default for HTSlib is to install with permission 644 (read-write for
+owner and read-only for group / other). This can be overridden by
+setting the LIB_PERM variable at install time with:
+
+    make install LIB_PERM=755
--- a/ext/htslib/LICENSE
+++ b/ext/htslib/LICENSE
@ -0,0 +1,69 @@
+[Files in this distribution outwith the cram/ subdirectory are distributed
+according to the terms of the following MIT/Expat license.]
+
+The MIT/Expat License
+
+Copyright (C) 2012-2024 Genome Research Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+
+[Files within the cram/ subdirectory in this distribution are distributed
+according to the terms of the following Modified 3-Clause BSD license.]
+
+The Modified-BSD License
+
+Copyright (C) 2012-2024 Genome Research Ltd.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the names Genome Research Ltd and Wellcome Trust Sanger Institute
+   nor the names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR ITS CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+[The use of a range of years within a copyright notice in this distribution
+should be interpreted as being equivalent to a list of years including the
+first and last year specified and all consecutive years between them.
+
+For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009,
+2011-2012" should be interpreted as being identical to a notice that reads
+"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice
+that reads "Copyright (C) 2005-2012" should be interpreted as being identical
+to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012".]
--- a/ext/htslib/Makefile
+++ b/ext/htslib/Makefile
--- a/ext/htslib/NEWS
+++ b/ext/htslib/NEWS
--- a/ext/htslib/README
+++ b/ext/htslib/README
@ -0,0 +1,27 @@
+HTSlib is an implementation of a unified C library for accessing common file
+formats, such as SAM, CRAM, VCF, and BCF, used for high-throughput sequencing
+data.  It is the core library used by samtools and bcftools.
+
+See INSTALL for building and installation instructions.
+
+Please cite this paper when using HTSlib for your publications:
+
+HTSlib: C library for reading/writing high-throughput sequencing data
+James K Bonfield, John Marshall, Petr Danecek, Heng Li, Valeriu Ohan, Andrew Whitwham, Thomas Keane, Robert M Davies
+GigaScience, Volume 10, Issue 2, February 2021, giab007, https://doi.org/10.1093/gigascience/giab007
+
+@article{10.1093/gigascience/giab007,
+    author = {Bonfield, James K and Marshall, John and Danecek, Petr and Li, Heng and Ohan, Valeriu and Whitwham, Andrew and Keane, Thomas and Davies, Robert M},
+    title = "{HTSlib: C library for reading/writing high-throughput sequencing data}",
+    journal = {GigaScience},
+    volume = {10},
+    number = {2},
+    year = {2021},
+    month = {02},
+    abstract = "{Since the original publication of the VCF and SAM formats, an explosion of software tools have been created to process these data files. To facilitate this a library was produced out of the original SAMtools implementation, with a focus on performance and robustness. The file formats themselves have become international standards under the jurisdiction of the Global Alliance for Genomics and Health.We present a software library for providing programmatic access to sequencing alignment and variant formats. It was born out of the widely used SAMtools and BCFtools applications. Considerable improvements have been made to the original code plus many new features including newer access protocols, the addition of the CRAM file format, better indexing and iterators, and better use of threading.Since the original Samtools release, performance has been considerably improved, with a BAM read-write loop running 5 times faster and BAM to SAM conversion 13 times faster (both using 16 threads, compared to Samtools 0.1.19). Widespread adoption has seen HTSlib downloaded \\&gt;1 million times from GitHub and conda. The C library has been used directly by an estimated 900 GitHub projects and has been incorporated into Perl, Python, Rust, and R, significantly expanding the number of uses via other languages. HTSlib is open source and is freely available from htslib.org under MIT/BSD license.}",
+    issn = {2047-217X},
+    doi = {10.1093/gigascience/giab007},
+    url = {https://doi.org/10.1093/gigascience/giab007},
+    note = {giab007},
+    eprint = {https://academic.oup.com/gigascience/article-pdf/10/2/giab007/36332285/giab007.pdf},
+}
--- a/ext/htslib/README.large_positions.md
+++ b/ext/htslib/README.large_positions.md
@ -0,0 +1,234 @@
+# HTSlib 64 bit reference positions
+
+HTSlib version 1.10 onwards internally use 64 bit reference positions.  This
+is to support analysis of species like axolotl, tulip and marbled lungfish
+which have, or are expected to have,  chromosomes longer than two gigabases.
+
+# File format support
+
+Currently 64 bit positions can only be stored in SAM and VCF format files.
+Binary BAM, CRAM and BCF cannot be used due to limitations in the formats
+themselves.  As SAM and VCF are text formats, they have no limit on the
+size of numeric values. Note that while 64 bit positions are supported by
+default for SAM, for VCF they must be enabled explicitly at compile time
+by editing Makefile and adding -DVCF_ALLOW_INT64=1 to CFLAGS.
+
+# Compatibility issues to check
+
+Various data structure members, function parameters, and return values have
+been expanded from 32 to 64 bits.  As a result, some changes may be needed to
+code that uses the library, even if it does not support long references.
+
+## Variadic functions taking format strings
+
+The type of various structure members (e.g. `bam1_core_t::pos`) and return
+values from some functions (e.g. `bam_cigar2rlen()`) have been changed to
+`hts_pos_t`, which is a 64-bit signed integer.  Using these in 32-bit
+code will generally work (as long as the stored positions are within range),
+however care needs to be taken when these values are passed directly
+to functions like `printf()` which take a variable-length argument list and
+a format string.
+
+Header file `htslib/hts.h` defines macro `PRIhts_pos` which can be
+used in `printf()` format strings to get the correct format specifier for
+an `hts_pos_t` value.  Code that needs to print positions should be
+changed from:
+
+```c
+printf("Position is %d\n", bam->core.pos);
+```
+
+to:
+
+```c
+printf("Position is %"PRIhts_pos"\n", bam->core.pos);
+```
+
+If for some reason compatibility with older versions of HTSlib (which do
+not have `hts_pos_t` or `PRIhts_pos`) is needed, the value can be cast to
+`int64_t` and printed as an explicitly 64-bit value:
+
+```c
+#include <inttypes.h> // For PRId64 and int64_t
+
+printf("Position is %" PRId64 "\n", (int64_t) bam->core.pos);
+```
+
+Passing incorrect types to variadic functions like `printf()` can lead
+to incorrect behaviour and security risks, so it important to track down
+and fix all of the places where this may happen.  Modern C compilers like
+gcc (version 3.0 onwards) and clang can check `printf()` and `scanf()`
+parameter types for compatibility against the format string.  To
+enable this, build code with `-Wall` or `-Wformat` and fix all the
+reported warnings.
+
+Where functions that take `printf`-style format strings are implemented,
+they should use the appropriate gcc attributes to enable format string
+checking.  `htslib/hts_defs.h` includes macros `HTS_FORMAT` and
+`HTS_PRINTF_FMT` which can be used to provide the attribute declaration
+in a portable way.  For example, `test/sam.c` uses them for a function
+that prints error messages:
+
+```
+void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) fail(const char *fmt, ...) { /* ... */ }
+```
+
+## Implicit type conversions
+
+Conversion of signed `int` or `int32_t` to `hts_pos_t` will always work.
+
+Conversion of `hts_pos_t` to `int` or `int32_t` will work as long as the value
+converted is within the range that can be stored in the destination.
+
+Code that casts unsigned `uint32_t` values to signed with the expectation
+that the result may be negative will no longer work as `hts_pos_t` can store
+values over UINT32_MAX.  Such code should be changed to use signed values.
+
+Functions hts_parse_region() and hts_parse_reg64() return special value
+`HTS_POS_MAX` for regions which extend to the end of the reference.
+This value is slightly smaller than INT64_MAX, but should be larger than
+any reference that is likely to be used.  When cast to `int32_t` the
+result should be `INT32_MAX`.
+
+# Upgrading code to work with 64 bit positions
+
+Variables used to store reference positions should be changed to
+type `hts_pos_t`.  Use `PRIhts_pos` in format strings when printing them.
+
+When converting positions stored in strings, use `strtoll()` in place of
+`atoi()` or `strtol()` (which produces a 32 bit value on 64-bit Windows and
+all 32-bit platforms).
+
+Programs which need to look up a reference sequence length from a `sam_hdr_t`
+structure should use `sam_hdr_tid2len()` instead of the old
+`sam_hdr_t::target_len` array (which is left as 32-bit for reasons of
+compatibility).  `sam_hdr_tid2len()` returns `hts_pos_t`, so works correctly
+for large references.
+
+Various functions which take pointer arguments have new versions which
+support `hts_pos_t *` arguments.  Code supporting 64-bit positions should
+use the new versions.  These are:
+
+Original function  | 64-bit version
+------------------ | --------------------
+fai_fetch()        | fai_fetch64()
+fai_fetchqual()    | fai_fetchqual64()
+faidx_fetch_seq()  | faidx_fetch_seq64()
+faidx_fetch_qual() | faidx_fetch_qual64()
+hts_parse_reg()    | hts_parse_reg64() or hts_parse_region()
+bam_plp_auto()     | bam_plp64_auto()
+bam_plp_next()     | bam_plp64_next()
+bam_mplp_auto()    | bam_mplp64_auto()
+
+Limited support has been added for 64-bit INFO values in VCF files, for large
+values in structural variant END tags.  New functions `bcf_update_info_int64()`
+and `bcf_get_info_int64()` can be used to set and fetch 64-bit INFO values.
+They both take arrays of `int64_t`.  `bcf_int64_missing` and
+`bcf_int64_vector_end` can be used to set missing and vector end values in
+these arrays.  The INFO data is stored in the minimum size needed, so there
+is no harm in using these functions to store smaller integer values.
+
+# Structure members that have changed size
+
+```
+File htslib/hts.h:
+   hts_pair32_t::begin
+   hts_pair32_t::end
+
+   (typedef hts_pair_pos_t is provided as a better-named replacement for hts_pair32_t)
+
+   hts_reglist_t::min_beg
+   hts_reglist_t::max_end
+
+   hts_itr_t::beg
+   hts_itr_t::end
+   hts_itr_t::curr_beg
+   hts_itr_t::curr_end
+
+File htslib/regidx.h:
+   reg_t::start
+   reg_t::end
+
+File htslib/sam.h:
+   bam1_core_t::pos
+   bam1_core_t::mpos
+   bam1_core_t::isize
+
+File htslib/synced_bcf_reader.h:
+   bcf_sr_regions_t::start
+   bcf_sr_regions_t::end
+   bcf_sr_regions_t::prev_start
+
+File htslib/vcf.h:
+   bcf_idinfo_t::info
+
+   bcf_info_t::v1::i
+
+   bcf1_t::pos
+   bcf1_t::rlen
+```
+
+# Functions where parameters or the return value have changed size
+
+Functions are annotated as follows:
+
+* `[new]`  The function has been added since version 1.9
+* `[parameters]` Function parameters have changed size
+* `[return]` Function return value has changed size
+
+```
+File htslib/faidx.h:
+
+   [new]        fai_fetch64()
+   [new]        fai_fetchqual64()
+   [new]        faidx_fetch_seq64()
+   [new]        faidx_fetch_qual64()
+   [new]        fai_parse_region()
+
+File htslib/hts.h:
+
+   [parameters] hts_idx_push()
+   [new]        hts_parse_reg64()
+   [parameters] hts_itr_query()
+   [parameters] hts_reg2bin()
+
+File htslib/kstring.h:
+
+   [new]        kputll()
+
+File htslib/regidx.h:
+
+   [parameters] regidx_overlap()
+
+File htslib/sam.h:
+
+   [new]        sam_hdr_tid2len()
+   [return]     bam_cigar2qlen()
+   [return]     bam_cigar2rlen()
+   [return]     bam_endpos()
+   [parameters] bam_itr_queryi()
+   [parameters] sam_itr_queryi()
+   [new]        bam_plp64_next()
+   [new]        bam_plp64_auto()
+   [new]        bam_mplp64_auto()
+   [parameters] sam_cap_mapq()
+   [parameters] sam_prob_realn()
+
+File htslib/synced_bcf_reader.h:
+
+   [parameters] bcf_sr_seek()
+   [parameters] bcf_sr_regions_overlap()
+
+File htslib/tbx.h:
+
+   [parameters] tbx_readrec()
+
+File htslib/vcf.h:
+
+   [parameters] bcf_readrec()
+   [new]        bcf_update_info_int64()
+   [new]        bcf_get_info_int64()
+   [return]     bcf_dec_int1()
+   [return]     bcf_dec_typed_int1()
+
+```
--- a/ext/htslib/annot-tsv.1
+++ b/ext/htslib/annot-tsv.1
@ -0,0 +1,259 @@
+'\" t
+.TH annot-tsv 1 "12 September 2024" "htslib-1.21" "Bioinformatics tools"
+.\"
+.\" Copyright (C) 2015, 2017-2018, 2023-2024 Genome Research Ltd.
+.\"
+.\" Author: Petr Danecek
+.\"
+.\" Permission is hereby granted, free of charge, to any person obtaining a
+.\" copy of this software and associated documentation files (the "Software"),
+.\" to deal in the Software without restriction, including without limitation
+.\" the rights to use, copy, modify, merge, publish, distribute, sublicense,
+.\" and/or sell copies of the Software, and to permit persons to whom the
+.\" Software is furnished to do so, subject to the following conditions:
+.\"
+.\" The above copyright notice and this permission notice shall be included in
+.\" all copies or substantial portions of the Software.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+.\" DEALINGS IN THE SOFTWARE.
+.\"
+.
+.\" For code blocks and examples (cf groff's Ultrix-specific man macros)
+.de EX
+
+.  in +\\$1
+.  nf
+.  ft CR
+..
+.de EE
+.  ft
+.  fi
+.  in
+
+..
+.SH NAME
+annot\-tsv \- transfer annotations from one TSV (tab\-separated values) file into another
+.SH SYNOPSIS
+.PP
+.B annot-tsv
+.RI [ OPTIONS ]
+.SH DESCRIPTION
+The program finds overlaps in two sets of genomic regions (for example two CNV call sets) and annotates regions of the target file
+.RB ( \-t ", " \-\-target\-file )
+with information from overlapping regions of the source file
+.RB ( \-s ", " \-\-source\-file ).
+
+It can transfer one or multiple columns
+.RB ( \-f ", " \-\-transfer )
+and the transfer can be conditioned on requiring matching values in one or more columns
+.RB ( \-m ", " \-\-match ).
+In addition to column transfer
+.RB ( \-f )
+and special annotations
+.RB ( \-a ", " \-\-annotate ),
+the program can operate in a simple grep-like mode and print matching lines (when neither
+.B \-f
+nor
+.B \-a
+are given) or drop matching lines
+.RB ( \-x ", " \-\-drop-overlaps ).
+
+All indexes and coordinates are 1-based and inclusive.
+.SH OPTIONS
+.SS "Common Options"
+.PP
+.BR \-c ", " \-\-core " SRC:TGT"
+.RS 4
+List of names of the core columns, in the order of chromosome, start and end positions, irrespective of the header name and order in which they appear in source or target files (for example "chr,beg,end:CHROM,START,END").
+If both files use the same header names, the TGT names can be omitted (for example "chr,beg,end").
+If SRC or TGT file has no header, 1-based indexes can be given instead (for example "chr,beg,end:3,1,2").
+Note that regions are not required, the program can work with a list of positions (for example "chr,beg,end:CHROM,POS,POS").
+.RE
+.PP
+.BR \-f ", " \-\-transfer " SRC:TGT"
+.RS 4
+Comma-separated list of columns to transfer. If the SRC column does not exist, interpret it as the default value to fill in when a match is found or a dot (".") when a match is not found. If the TGT column does not exist, a new column is created. If the TGT column already exists, its values will be overwritten when overlap is found and left as is otherwise.
+.RE
+.PP
+.BR \-m ", " \-\-match " SRC:TGT"
+.RS 4
+The columns required to be identical
+.RE
+.PP
+.BR \-o ", " \-\-output " FILE"
+.RS 4
+Output file name, by default the result is printed on standard output
+.RE
+.PP
+.BR \-s ", " \-\-source\-file " FILE"
+.RS 4
+Source file with annotations to transfer
+.RE
+.PP
+.BR \-t ", " \-\-target\-file " FILE"
+.RS 4
+Target file to be extend with annotations from
+.BR \-s ", " \-\-source\-file
+.RE
+.SS "Other options"
+.PP
+.B \-\-allow\-dups
+.RS 4
+Add the same annotations multiple times if multiple overlaps are found
+.RE
+.PP
+.B \-\-help
+.RS 4
+This help message
+.RE
+.PP
+.BR \-\-max\-annots " INT"
+.RS 4
+Add at most INT annotations per column to save time when many overlaps are found with a single region
+.RE
+.PP
+.B \-\-version
+.RS 4
+Print version string and exit
+.RE
+.PP
+.BR \-a ", " \-\-annotate " LIST"
+.RS 4
+Add one or more special annotation and its target name separated by ':'. If no target name is given, the special annotation's name will be used in output header.
+.PP
+.I cnt
+.RS 4
+number of overlapping regions
+.RE
+.PP
+.I frac
+.RS 4
+fraction of the target region with an overlap
+.RE
+.PP
+.I nbp
+.RS 4
+number of source base pairs in the overlap
+.RE
+.RE
+.PP
+.BR \-d ", " \-\-delim " SRC:TGT"
+.RS 4
+Column delimiter in the source and the target file. For example, if both files are comma-delimited, run with
+"--delim ,:," or simply "--delim ,". If the source file is comma-delimited and the target file is tab-delimited,
+run with "-d $',:\\t'".
+.RE
+.PP
+.BR \-h ", " \-\-headers " SRC:TGT"
+.RS 4
+Line number of the header row with column names. By default the first line is interpreted as header if it starts with the comment
+character ("#"), otherwise expects numeric indices. However, if the first line does not start with "#" but still
+contains the column names, use "--headers 1:1". To ignore existing header (skip comment lines) and use numeric indices,
+use "--headers 0:0" which is equivalent to "--ignore-headers". When negative value is given, it is interpreted as the number of
+lines from the end of the comment block. Specifically, "--headers -1" takes the column names from the last line of
+the comment block (e.g., the "#CHROM" line in the VCF format).
+.RE
+.PP
+.BR \-H ", " \-\-ignore\-headers
+.RS 4
+Ignore the headers completely and use numeric indexes even when a header exists
+.RE
+.PP
+.BR \-I ", " \-\-no\-hdr\-idx
+.RS 4
+Suppress index numbers in the printed header. If given twice, drop the entire header.
+.RE
+.PP
+.BR \-O ", " \-\-overlap " FLOAT,[FLOAT]"
+.RS 4
+Minimum overlap as a fraction of region length in SRC and TGT, respectively (with two numbers), or in
+at least one of the overlapping regions (with a single number). If also
+.BR \-r ", " \-\-reciprocal
+is given, require at least
+.I FLOAT
+overlap with respect to both regions. Two identical numbers are equivalent to running with
+.BR \-r ", " \-\-reciprocal
+.RE
+.PP
+.BR \-r ", " \-\-reciprocal
+.RS 4
+Require the
+.BR \-O ", " \-\-overlap
+with respect to both overlapping regions
+.RE
+.PP
+.BR \-x ", " \-\-drop-overlaps
+.RS 4
+Drop overlapping regions (cannot be combined with
+.BR \-f ", " \-\-transfer )
+.RE
+.SH EXAMPLE
+
+Both SRC and TGT input files must be tab-delimited files with or without a header, their columns can be named differently, can appear in arbitrary order. For example consider the source file
+
+.EX
+#chr   beg   end   sample   type   qual
+chr1   100   200   smpl1    DEL    10
+chr1   300   400   smpl2    DUP    30
+.EE
+and the target file
+.EX
+150   200   chr1   smpl1
+150   200   chr1   smpl2
+350   400   chr1   smpl1
+350   400   chr1   smpl2
+.EE
+In the first example we transfer type and quality but only for regions with matching sample. Notice that the header is present in SRC but not in TGT, therefore we use column indexes for the latter
+.EX
+annot-tsv -s src.txt.gz -t tgt.txt.gz -c chr,beg,end:3,1,2 -m sample:4 -f type,qual
+150   200   chr1   smpl1   DEL   10
+150   200   chr1   smpl2   .     .
+350   400   chr1   smpl1   .     .
+350   400   chr1   smpl2   DUP   30
+.EE
+The next example demonstrates the special annotations nbp and cnt,
+with target name as pair,count.
+In this case we use a target file with headers so that column names will
+be copied to the output:
+.EX
+#from	to	chrom	sample
+150	200	chr1	smpl1
+150	200	chr1	smpl2
+350	400	chr1	smpl1
+350	400	chr1	smpl2
+.EE
+
+.EX
+annot-tsv -s src.txt.gz -t tgt_hdr.txt.gz -c chr,beg,end:chrom,from,to -m sample -f type,qual -a nbp,cnt:pair,count
+#[1]from	[2]to	[3]chrom	[4]sample	[5]type	[6]qual	[7]pair	[8]count
+150	200	chr1	smpl1	DEL	10	51	1
+150	200	chr1	smpl2	.	.	0	0
+350	400	chr1	smpl1	.	.	0	0
+350	400	chr1	smpl2	DUP	30	51	1
+.EE
+One of the SRC or TGT file can be streamed from stdin
+.EX
+cat src.txt | annot\-tsv \-t tgt.txt \-c chr,beg,end:3,2,1 \-m sample:4 \-f type,qual \-o output.txt
+cat tgt.txt | annot\-tsv \-s src.txt \-c chr,beg,end:3,2,1 \-m sample:4 \-f type,qual \-o output.txt
+.EE
+
+The program can be used in a grep-like mode to print only matching regions of the target file without modifying the records
+
+.EX
+annot\-tsv \-s src.txt \-t tgt.txt \-c chr,beg,end:3,2,1 \-m sample:4
+150   200   chr1   smpl1
+350   400   chr1   smpl2
+.EE
+
+.SH AUTHORS
+The program was written by Petr Danecek and was originally published on github as annot\-regs
+.SH COPYING
+The MIT/Expat License, see the LICENSE document for details.
+.br
+Copyright (c) Genome Research Ltd.
--- a/ext/htslib/annot-tsv.c
+++ b/ext/htslib/annot-tsv.c
--- a/ext/htslib/bcf_sr_sort.c
+++ b/ext/htslib/bcf_sr_sort.c
@ -0,0 +1,707 @@
+/*
+    Copyright (C) 2017-2021 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3@sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <assert.h>
+#include <strings.h>
+
+#include "bcf_sr_sort.h"
+#include "htslib/khash_str2int.h"
+#include "htslib/kbitset.h"
+
+#define SR_REF   1
+#define SR_SNP   2
+#define SR_INDEL 4
+#define SR_OTHER 8
+#define SR_SCORE(srt,a,b) (srt)->score[((a)<<4)|(b)]
+
+// Logical AND
+static inline int kbs_logical_and(kbitset_t *bs1, kbitset_t *bs2)
+{
+    // General case, bitsets of unequal size:
+    //  int i, n = bs1->n < bs2->n ? bs1->n : bs2->n;
+    int i, n = bs1->n;
+
+    for (i=0; i<n; i++) if ( bs1->b[i] & bs2->b[i] ) return 1;
+    return 0;
+}
+
+// Bitwise OR, dst will be modified, src will be left unchanged
+static inline void kbs_bitwise_or(kbitset_t *dst, kbitset_t *src)
+{
+    int i;
+    for (i=0; i<dst->n; i++) dst->b[i] |= src->b[i];
+}
+
+
+static void bcf_sr_init_scores(sr_sort_t *srt)
+{
+    int i,jbit,kbit;
+
+    // lower number = lower priority, zero means forbidden
+
+    if ( srt->pair & BCF_SR_PAIR_ANY ) srt->pair |= (BCF_SR_PAIR_SNPS | BCF_SR_PAIR_INDELS | BCF_SR_PAIR_SNP_REF | BCF_SR_PAIR_INDEL_REF);
+    if ( srt->pair & BCF_SR_PAIR_SNPS ) SR_SCORE(srt,SR_SNP,SR_SNP) = 3;
+    if ( srt->pair & BCF_SR_PAIR_INDELS ) SR_SCORE(srt,SR_INDEL,SR_INDEL) = 3;
+    if ( srt->pair & BCF_SR_PAIR_SNP_REF )
+    {
+        SR_SCORE(srt,SR_SNP,SR_REF) = 2;
+        SR_SCORE(srt,SR_REF,SR_SNP) = 2;
+    }
+    if ( srt->pair & BCF_SR_PAIR_INDEL_REF )
+    {
+        SR_SCORE(srt,SR_INDEL,SR_REF) = 2;
+        SR_SCORE(srt,SR_REF,SR_INDEL) = 2;
+    }
+    if ( srt->pair & BCF_SR_PAIR_ANY )
+    {
+        for (i=0; i<256; i++)
+            if ( !srt->score[i] ) srt->score[i] = 1;
+    }
+
+    // set all combinations
+    for (i=0; i<256; i++)
+    {
+        if ( srt->score[i] ) continue;      // already set
+        int max = 0;
+        for (jbit=0; jbit<4; jbit++)        // high bits
+        {
+            int j = 1<<jbit;
+            if ( !(i & (j<<4)) ) continue;
+            for (kbit=0; kbit<4; kbit++)    // low bits
+            {
+                int k = 1<<kbit;
+                if ( !(i & k) ) continue;
+                if ( max < SR_SCORE(srt,j,k) ) max = SR_SCORE(srt,j,k);
+            }
+        }
+        srt->score[i] = max;
+    }
+}
+static int multi_is_exact(var_t *avar, var_t *bvar)
+{
+    if ( avar->nalt != bvar->nalt ) return 0;
+
+    int alen = strlen(avar->str);
+    int blen = strlen(bvar->str);
+    if ( alen != blen ) return 0;
+
+    char *abeg = avar->str;
+    while ( *abeg )
+    {
+        char *aend = abeg;
+        while ( *aend && *aend!=',' ) aend++;
+
+        char *bbeg = bvar->str;
+        while ( *bbeg )
+        {
+            char *bend = bbeg;
+            while ( *bend && *bend!=',' ) bend++;
+            if ( bend - bbeg == aend - abeg && !strncasecmp(abeg,bbeg,bend-bbeg) ) break;
+            bbeg = *bend ? bend+1 : bend;
+        }
+        if ( !*bbeg ) return 0;
+
+        abeg = *aend ? aend+1 : aend;
+    }
+    return 1;
+}
+static int multi_is_subset(var_t *avar, var_t *bvar)
+{
+    char *abeg = avar->str;
+    while ( *abeg )
+    {
+        char *aend = abeg;
+        while ( *aend && *aend!=',' ) aend++;
+
+        char *bbeg = bvar->str;
+        while ( *bbeg )
+        {
+            char *bend = bbeg;
+            while ( *bend && *bend!=',' ) bend++;
+            if ( bend - bbeg == aend - abeg && !strncasecmp(abeg,bbeg,bend-bbeg) ) return 1;
+            bbeg = *bend ? bend+1 : bend;
+        }
+        abeg = *aend ? aend+1 : aend;
+    }
+    return 0;
+}
+static uint32_t pairing_score(sr_sort_t *srt, int ivset, int jvset)
+{
+    varset_t *iv = &srt->vset[ivset];
+    varset_t *jv = &srt->vset[jvset];
+
+    // Restrictive logic: the strictest type from a group is selected,
+    // so that, for example, snp+ref does not lead to the inclusion of an indel
+    int i,j;
+    uint32_t min = UINT32_MAX;
+    for (i=0; i<iv->nvar; i++)
+    {
+        var_t *ivar = &srt->var[iv->var[i]];
+        for (j=0; j<jv->nvar; j++)
+        {
+            var_t *jvar = &srt->var[jv->var[j]];
+            if ( srt->pair & BCF_SR_PAIR_EXACT )
+            {
+                if ( ivar->type != jvar->type ) continue;
+                if ( !strcmp(ivar->str,jvar->str) ) return UINT32_MAX;  // exact match, best possibility
+                if ( multi_is_exact(ivar,jvar) ) return UINT32_MAX; // identical alleles
+                continue;
+            }
+            if ( ivar->type==jvar->type && !strcmp(ivar->str,jvar->str) ) return UINT32_MAX;  // exact match, best possibility
+            if ( ivar->type & jvar->type && multi_is_subset(ivar,jvar) ) return UINT32_MAX; // one of the alleles is identical
+
+            uint32_t score = SR_SCORE(srt,ivar->type,jvar->type);
+            if ( !score ) return 0;     // some of the varsets in the two groups are not compatible, will not pair
+            if ( min>score ) min = score;
+        }
+    }
+    if ( srt->pair & BCF_SR_PAIR_EXACT ) return 0;
+
+    assert( min!=UINT32_MAX );
+
+    uint32_t cnt = 0;
+    for (i=0; i<iv->nvar; i++) cnt += srt->var[iv->var[i]].nvcf;
+    for (j=0; j<jv->nvar; j++) cnt += srt->var[jv->var[j]].nvcf;
+
+    return (1u<<(28+min)) + cnt;
+}
+static void remove_vset(sr_sort_t *srt, int jvset)
+{
+    if ( jvset+1 < srt->nvset )
+    {
+        varset_t tmp = srt->vset[jvset];
+        memmove(&srt->vset[jvset], &srt->vset[jvset+1], sizeof(varset_t)*(srt->nvset - jvset - 1));
+        srt->vset[srt->nvset-1] = tmp;
+
+        int *jmat = srt->pmat + jvset*srt->ngrp;
+        memmove(jmat, &jmat[srt->ngrp],sizeof(int)*(srt->nvset - jvset - 1)*srt->ngrp);
+
+        memmove(&srt->cnt[jvset], &srt->cnt[jvset+1], sizeof(int)*(srt->nvset - jvset - 1));
+    }
+    srt->nvset--;
+}
+static int merge_vsets(sr_sort_t *srt, int ivset, int jvset)
+{
+    int i,j;
+    if ( ivset > jvset ) { i = ivset; ivset = jvset; jvset = i; }
+
+    varset_t *iv = &srt->vset[ivset];
+    varset_t *jv = &srt->vset[jvset];
+
+    kbs_bitwise_or(iv->mask,jv->mask);
+
+    i = iv->nvar;
+    iv->nvar += jv->nvar;
+    hts_expand(int, iv->nvar, iv->mvar, iv->var);
+    for (j=0; j<jv->nvar; j++,i++) iv->var[i] = jv->var[j];
+
+    int *imat = srt->pmat + ivset*srt->ngrp;
+    int *jmat = srt->pmat + jvset*srt->ngrp;
+    for (i=0; i<srt->ngrp; i++) imat[i] += jmat[i];
+    srt->cnt[ivset] += srt->cnt[jvset];
+
+    remove_vset(srt, jvset);
+
+    return ivset;
+}
+
+static int push_vset(sr_sort_t *srt, int ivset)
+{
+    varset_t *iv = &srt->vset[ivset];
+    int i,j;
+    for (i=0; i<srt->sr->nreaders; i++)
+    {
+        vcf_buf_t *buf = &srt->vcf_buf[i];
+        buf->nrec++;
+        hts_expand(bcf1_t*,buf->nrec,buf->mrec,buf->rec);
+        buf->rec[buf->nrec-1] = NULL;
+    }
+    for (i=0; i<iv->nvar; i++)
+    {
+        var_t *var = &srt->var[ iv->var[i] ];
+        for (j=0; j<var->nvcf; j++)
+        {
+            int jvcf = var->vcf[j];
+            vcf_buf_t *buf = &srt->vcf_buf[jvcf];
+            buf->rec[buf->nrec-1] = var->rec[j];
+        }
+    }
+    remove_vset(srt, ivset);
+    return 0; // FIXME: check for errs in this function
+}
+
+static int cmpstringp(const void *p1, const void *p2)
+{
+    return strcmp(* (char * const *) p1, * (char * const *) p2);
+}
+
+#define DEBUG_VSETS 0
+#if DEBUG_VSETS
+void debug_vsets(sr_sort_t *srt)
+{
+    int i,j,k;
+    for (i=0; i<srt->nvset; i++)
+    {
+        fprintf(stderr,"dbg_vset %d:", i);
+        for (j=0; j<srt->vset[i].mask->n; j++) fprintf(stderr,"%c%lu",j==0?' ':':',srt->vset[i].mask->b[j]);
+        fprintf(stderr,"\t");
+        for (j=0; j<srt->vset[i].nvar; j++)
+        {
+            var_t *var = &srt->var[srt->vset[i].var[j]];
+            fprintf(stderr,"\t%s",var->str);
+            for (k=0; k<var->nvcf; k++)
+                fprintf(stderr,"%c%d", k==0?':':',',var->vcf[k]);
+        }
+        fprintf(stderr,"\n");
+    }
+}
+#endif
+
+#define DEBUG_VBUF 0
+#if DEBUG_VBUF
+void debug_vbuf(sr_sort_t *srt)
+{
+    int i, j;
+    for (j=0; j<srt->vcf_buf[0].nrec; j++)
+    {
+        fprintf(stderr,"dbg_vbuf %d:\t", j);
+        for (i=0; i<srt->sr->nreaders; i++)
+        {
+            vcf_buf_t *buf = &srt->vcf_buf[i];
+            fprintf(stderr,"\t%"PRIhts_pos, buf->rec[j] ? buf->rec[j]->pos+1 : 0);
+        }
+        fprintf(stderr,"\n");
+    }
+}
+#endif
+
+static char *grp_create_key(sr_sort_t *srt)
+{
+    if ( !srt->str.l ) return strdup("");
+    int i;
+    hts_expand(char*,srt->noff,srt->mcharp,srt->charp);
+    for (i=0; i<srt->noff; i++)
+    {
+        srt->charp[i] = srt->str.s + srt->off[i];
+        if ( i>0 ) srt->charp[i][-1] = 0;
+    }
+    qsort(srt->charp, srt->noff, sizeof(*srt->charp), cmpstringp);
+    char *ret = (char*) malloc(srt->str.l + 1), *ptr = ret;
+    for (i=0; i<srt->noff; i++)
+    {
+        int len = strlen(srt->charp[i]);
+        memcpy(ptr, srt->charp[i], len);
+        ptr += len + 1;
+        ptr[-1] = i+1==srt->noff ? 0 : ';';
+    }
+    return ret;
+}
+int bcf_sr_sort_set_active(sr_sort_t *srt, int idx)
+{
+    hts_expand(int,idx+1,srt->mactive,srt->active);
+    srt->nactive = 1;
+    srt->active[srt->nactive - 1] = idx;
+    return 0; // FIXME: check for errs in this function
+}
+int bcf_sr_sort_add_active(sr_sort_t *srt, int idx)
+{
+    hts_expand(int,idx+1,srt->mactive,srt->active);
+    srt->nactive++;
+    srt->active[srt->nactive - 1] = idx;
+    return 0; // FIXME: check for errs in this function
+}
+static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, hts_pos_t min_pos)
+{
+    if ( !srt->grp_str2int )
+    {
+        // first time here, initialize
+        if ( !srt->pair )
+        {
+            if ( readers->collapse==COLLAPSE_NONE ) readers->collapse = BCF_SR_PAIR_EXACT;
+            bcf_sr_set_opt(readers, BCF_SR_PAIR_LOGIC, readers->collapse);
+        }
+        bcf_sr_init_scores(srt);
+        srt->grp_str2int = khash_str2int_init();
+        srt->var_str2int = khash_str2int_init();
+    }
+    int k;
+    khash_t(str2int) *hash;
+    hash = srt->grp_str2int;
+    for (k=0; k < kh_end(hash); k++)
+        if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k));
+    hash = srt->var_str2int;
+    for (k=0; k < kh_end(hash); k++)
+        if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k));
+    kh_clear(str2int, srt->grp_str2int);
+    kh_clear(str2int, srt->var_str2int);
+    srt->ngrp = srt->nvar = srt->nvset = 0;
+
+    grp_t grp;
+    memset(&grp,0,sizeof(grp_t));
+
+    // group VCFs into groups, each with a unique combination of variants in the duplicate lines
+    int ireader,ivar,irec,igrp,ivset,iact;
+    for (ireader=0; ireader<readers->nreaders; ireader++) srt->vcf_buf[ireader].nrec = 0;
+    for (iact=0; iact<srt->nactive; iact++)
+    {
+        ireader = srt->active[iact];
+        bcf_sr_t *reader = &readers->readers[ireader];
+        int rid   = bcf_hdr_name2id(reader->header, chr);
+        grp.nvar  = 0;
+        hts_expand(int,reader->nbuffer,srt->moff,srt->off);
+        srt->noff  = 0;
+        srt->str.l = 0;
+        for (irec=1; irec<=reader->nbuffer; irec++)
+        {
+            bcf1_t *line = reader->buffer[irec];
+            if ( line->rid!=rid || line->pos!=min_pos ) break;
+
+            if ( srt->str.l ) kputc(';',&srt->str);
+            srt->off[srt->noff++] = srt->str.l;
+            size_t beg  = srt->str.l;
+            int end_pos = -1;
+            for (ivar=1; ivar<line->n_allele; ivar++)
+            {
+                if ( ivar>1 ) kputc(',',&srt->str);
+                kputs(line->d.allele[0],&srt->str);
+                kputc('>',&srt->str);
+                kputs(line->d.allele[ivar],&srt->str);
+
+                // If symbolic allele, check also the END tag in case there are multiple events,
+                // such as <DEL>s, starting at the same positions
+                if ( line->d.allele[ivar][0]=='<' )
+                {
+                    if ( end_pos==-1 )
+                    {
+                        bcf_info_t *end_info = bcf_get_info(reader->header,line,"END");
+                        if ( end_info )
+                            end_pos = (int)end_info->v1.i;  // this is only to create a unique id, we don't mind a potential int64 overflow
+                        else
+                            end_pos = 0;
+                    }
+                    if ( end_pos )
+                    {
+                        kputc('/',&srt->str);
+                        kputw(end_pos, &srt->str);
+                    }
+                }
+            }
+            if ( line->n_allele==1 )
+            {
+                kputs(line->d.allele[0],&srt->str);
+                kputsn(">.",2,&srt->str);
+            }
+
+            // Create new variant or attach to existing one. But careful, there can be duplicate
+            // records with the same POS,REF,ALT (e.g. in dbSNP-b142)
+            char *var_str = beg + srt->str.s;
+            int ret, var_idx = 0, var_end = srt->str.l;
+            while ( 1 )
+            {
+                ret = khash_str2int_get(srt->var_str2int, var_str, &ivar);
+                if ( ret==-1 ) break;
+
+                var_t *var = &srt->var[ivar];
+                if ( var->vcf[var->nvcf-1] != ireader ) break;
+
+                srt->str.l = var_end;
+                kputw(var_idx, &srt->str);
+                var_str = beg + srt->str.s;
+                var_idx++;
+            }
+            if ( ret==-1 )
+            {
+                ivar = srt->nvar++;
+                hts_expand0(var_t,srt->nvar,srt->mvar,srt->var);
+                srt->var[ivar].nvcf = 0;
+                khash_str2int_set(srt->var_str2int, strdup(var_str), ivar);
+                free(srt->var[ivar].str);   // possible left-over from the previous position
+            }
+            var_t *var = &srt->var[ivar];
+            var->nalt = line->n_allele - 1;
+            var->type = bcf_get_variant_types(line);
+            srt->str.s[var_end] = 0;
+            if ( ret==-1 )
+                var->str = strdup(var_str);
+
+            int mvcf = var->mvcf;
+            var->nvcf++;
+            hts_expand0(int*, var->nvcf, var->mvcf, var->vcf);
+            if ( mvcf != var->mvcf ) var->rec = (bcf1_t **) realloc(var->rec,sizeof(bcf1_t*)*var->mvcf);
+            var->vcf[var->nvcf-1] = ireader;
+            var->rec[var->nvcf-1] = line;
+
+            grp.nvar++;
+            hts_expand(var_t,grp.nvar,grp.mvar,grp.var);
+            grp.var[grp.nvar-1] = ivar;
+        }
+        char *grp_key = grp_create_key(srt);
+        int ret = khash_str2int_get(srt->grp_str2int, grp_key, &igrp);
+        if ( ret==-1 )
+        {
+            igrp = srt->ngrp++;
+            hts_expand0(grp_t, srt->ngrp, srt->mgrp, srt->grp);
+            free(srt->grp[igrp].var);
+            srt->grp[igrp] = grp;
+            srt->grp[igrp].key = grp_key;
+            khash_str2int_set(srt->grp_str2int, grp_key, igrp);
+            memset(&grp,0,sizeof(grp_t));
+        }
+        else
+            free(grp_key);
+        srt->grp[igrp].nvcf++;
+    }
+    free(grp.var);
+
+    // initialize bitmask - which groups is the variant present in
+    for (ivar=0; ivar<srt->nvar; ivar++)
+    {
+        if ( kbs_resize(&srt->var[ivar].mask, srt->ngrp) < 0 )
+        {
+            fprintf(stderr, "[%s:%d %s] kbs_resize failed\n", __FILE__,__LINE__,__func__);
+            exit(1);
+        }
+        kbs_clear(srt->var[ivar].mask);
+    }
+    for (igrp=0; igrp<srt->ngrp; igrp++)
+    {
+        for (ivar=0; ivar<srt->grp[igrp].nvar; ivar++)
+        {
+            int i = srt->grp[igrp].var[ivar];
+            kbs_insert(srt->var[i].mask, igrp);
+        }
+    }
+
+    // create the initial list of variant sets
+    for (ivar=0; ivar<srt->nvar; ivar++)
+    {
+        ivset = srt->nvset++;
+        hts_expand0(varset_t, srt->nvset, srt->mvset, srt->vset);
+
+        varset_t *vset = &srt->vset[ivset];
+        vset->nvar = 1;
+        hts_expand0(var_t, vset->nvar, vset->mvar, vset->var);
+        vset->var[vset->nvar-1] = ivar;
+        var_t *var  = &srt->var[ivar];
+        vset->cnt   = var->nvcf;
+        if ( kbs_resize(&vset->mask, srt->ngrp) < 0 )
+        {
+            fprintf(stderr, "[%s:%d %s] kbs_resize failed\n", __FILE__,__LINE__,__func__);
+            exit(1);
+        }
+        kbs_clear(vset->mask);
+        kbs_bitwise_or(vset->mask, var->mask);
+
+        int type = 0;
+        if ( var->type==VCF_REF ) type |= SR_REF;
+        else
+        {
+            if ( var->type & VCF_SNP ) type |= SR_SNP;
+            if ( var->type & VCF_MNP ) type |= SR_SNP;
+            if ( var->type & VCF_INDEL ) type |= SR_INDEL;
+            if ( var->type & VCF_OTHER ) type |= SR_OTHER;
+        }
+        var->type = type;
+    }
+#if DEBUG_VSETS
+    debug_vsets(srt);
+#endif
+
+    // initialize the pairing matrix
+    hts_expand(int, srt->ngrp*srt->nvset, srt->mpmat, srt->pmat);
+    hts_expand(int, srt->nvset, srt->mcnt, srt->cnt);
+    memset(srt->pmat, 0, sizeof(*srt->pmat)*srt->ngrp*srt->nvset);
+    for (ivset=0; ivset<srt->nvset; ivset++)
+    {
+        varset_t *vset = &srt->vset[ivset];
+        for (igrp=0; igrp<srt->ngrp; igrp++) srt->pmat[ivset*srt->ngrp+igrp] = 0;
+        srt->cnt[ivset] = vset->cnt;
+    }
+
+    // pair the lines
+    while ( srt->nvset )
+    {
+#if DEBUG_VSETS
+    fprintf(stderr,"\n");
+    debug_vsets(srt);
+#endif
+
+        int imax = 0;
+        for (ivset=1; ivset<srt->nvset; ivset++)
+            if ( srt->cnt[imax] < srt->cnt[ivset] ) imax = ivset;
+
+        int ipair = -1;
+        uint32_t max_score = 0;
+        for (ivset=0; ivset<srt->nvset; ivset++)
+        {
+            if ( kbs_logical_and(srt->vset[imax].mask,srt->vset[ivset].mask) ) continue;   // cannot be merged
+            uint32_t score = pairing_score(srt, imax, ivset);
+            // fprintf(stderr,"score: %d %d, logic=%d \t..\t %u\n", imax,ivset,srt->pair,score);
+            if ( max_score < score ) { max_score = score; ipair = ivset; }
+        }
+
+        // merge rows creating a new variant set this way
+        if ( ipair!=-1 && ipair!=imax )
+        {
+            imax = merge_vsets(srt, imax, ipair);
+            continue;
+        }
+
+        push_vset(srt, imax);
+    }
+
+    srt->chr = chr;
+    srt->pos = min_pos;
+
+    return 0;  // FIXME: check for errs in this function
+}
+
+int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, hts_pos_t min_pos)
+{
+    int i,j;
+    assert( srt->nactive>0 );
+
+    if ( srt->nsr != readers->nreaders )
+    {
+        srt->sr = readers;
+        if ( srt->nsr < readers->nreaders )
+        {
+            srt->vcf_buf = (vcf_buf_t*) realloc(srt->vcf_buf,readers->nreaders*sizeof(vcf_buf_t));
+            memset(srt->vcf_buf + srt->nsr, 0, sizeof(vcf_buf_t)*(readers->nreaders - srt->nsr));
+            if ( srt->msr < srt->nsr ) srt->msr = srt->nsr;
+        }
+        srt->nsr = readers->nreaders;
+        srt->chr = NULL;
+    }
+    if ( srt->nactive == 1 )
+    {
+        if ( readers->nreaders>1 )
+            memset(readers->has_line, 0, readers->nreaders*sizeof(*readers->has_line));
+        bcf_sr_t *reader = &readers->readers[srt->active[0]];
+        assert( reader->buffer[1]->pos==min_pos );
+        bcf1_t *tmp = reader->buffer[0];
+        for (j=1; j<=reader->nbuffer; j++) reader->buffer[j-1] = reader->buffer[j];
+        reader->buffer[ reader->nbuffer ] = tmp;
+        reader->nbuffer--;
+        readers->has_line[srt->active[0]] = 1;
+        return 1;
+    }
+    if ( !srt->chr || srt->pos!=min_pos || strcmp(srt->chr,chr) ) bcf_sr_sort_set(readers, srt, chr, min_pos);
+
+    if ( !srt->vcf_buf[0].nrec ) return 0;
+
+#if DEBUG_VBUF
+    debug_vbuf(srt);
+#endif
+
+    int nret = 0;
+    for (i=0; i<srt->sr->nreaders; i++)
+    {
+        vcf_buf_t *buf = &srt->vcf_buf[i];
+
+        if ( buf->rec[0] )
+        {
+            bcf_sr_t *reader = &srt->sr->readers[i];
+            for (j=1; j<=reader->nbuffer; j++)
+                if ( reader->buffer[j] == buf->rec[0] ) break;
+
+            assert( j<=reader->nbuffer );
+
+            bcf1_t *tmp = reader->buffer[0];
+            reader->buffer[0] = reader->buffer[j++];
+            for (; j<=reader->nbuffer; j++) reader->buffer[j-1] = reader->buffer[j];
+            reader->buffer[ reader->nbuffer ] = tmp;
+            reader->nbuffer--;
+
+            nret++;
+            srt->sr->has_line[i] = 1;
+        }
+        else
+            srt->sr->has_line[i] = 0;
+
+        buf->nrec--;
+        if ( buf->nrec > 0 )
+            memmove(buf->rec, &buf->rec[1], buf->nrec*sizeof(bcf1_t*));
+    }
+    return nret;
+}
+void bcf_sr_sort_remove_reader(bcf_srs_t *readers, sr_sort_t *srt, int i)
+{
+    //vcf_buf is allocated only in bcf_sr_sort_next
+    //So, a call to bcf_sr_add_reader() followed immediately by bcf_sr_remove_reader()
+    //would cause the program to crash in this segment
+    if (srt->vcf_buf)
+    {
+        free(srt->vcf_buf[i].rec);
+        if ( i+1 < srt->nsr )
+            memmove(&srt->vcf_buf[i], &srt->vcf_buf[i+1], (srt->nsr - i - 1)*sizeof(vcf_buf_t));
+        memset(srt->vcf_buf + srt->nsr - 1, 0, sizeof(vcf_buf_t));
+    }
+}
+sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt)
+{
+    if ( !srt ) return calloc(1,sizeof(sr_sort_t));
+    memset(srt,0,sizeof(sr_sort_t));
+    return srt;
+}
+void bcf_sr_sort_reset(sr_sort_t *srt)
+{
+    srt->chr = NULL;
+}
+void bcf_sr_sort_destroy(sr_sort_t *srt)
+{
+    free(srt->active);
+    if ( srt->var_str2int ) khash_str2int_destroy_free(srt->var_str2int);
+    if ( srt->grp_str2int ) khash_str2int_destroy_free(srt->grp_str2int);
+    int i;
+    for (i=0; i<srt->nsr; i++) free(srt->vcf_buf[i].rec);
+    free(srt->vcf_buf);
+    for (i=0; i<srt->mvar; i++)
+    {
+        free(srt->var[i].str);
+        free(srt->var[i].vcf);
+        free(srt->var[i].rec);
+        kbs_destroy(srt->var[i].mask);
+    }
+    free(srt->var);
+    for (i=0; i<srt->mgrp; i++)
+        free(srt->grp[i].var);
+    free(srt->grp);
+    for (i=0; i<srt->mvset; i++)
+    {
+        kbs_destroy(srt->vset[i].mask);
+        free(srt->vset[i].var);
+    }
+    free(srt->vset);
+    free(srt->str.s);
+    free(srt->off);
+    free(srt->charp);
+    free(srt->cnt);
+    free(srt->pmat);
+    memset(srt,0,sizeof(*srt));
+}
+
--- a/ext/htslib/bcf_sr_sort.h
+++ b/ext/htslib/bcf_sr_sort.h
@ -0,0 +1,108 @@
+/*
+    Copyright (C) 2017 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3@sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+
+/*
+    Reorder duplicate lines so that compatible variant types are
+    returned together by bcf_sr_next_line()
+
+    - readers grouped by variants. Even with many readers there will be
+      typically only several groups
+
+*/
+
+#ifndef BCF_SR_SORT_H
+#define BCF_SR_SORT_H
+
+#include "htslib/synced_bcf_reader.h"
+#include "htslib/kbitset.h"
+
+typedef struct
+{
+    int nrec, mrec;
+    bcf1_t **rec;
+}
+vcf_buf_t;
+
+typedef struct
+{
+    char *str;      // "A>C" for biallelic records or "A>C,A>CC" for multiallelic records
+    int type;       // VCF_SNP, VCF_REF, etc.
+    int nalt;       // number of alternate alleles in this record
+    int nvcf, mvcf, *vcf;   // the list of readers with the same variants
+    bcf1_t **rec;           // list of VCF records in the readers
+    kbitset_t *mask;        // which groups contain the variant
+}
+var_t;
+
+typedef struct
+{
+    char *key;              // only for debugging
+    int nvar, mvar, *var;   // the variants and their type
+    int nvcf;               // number of readers with the same variants
+}
+grp_t;
+
+typedef struct
+{
+    int nvar, mvar, *var;   // list of compatible variants that can be output together
+    int cnt;                // number of readers in this group
+    kbitset_t *mask;        // which groups are populated in this set (replace with expandable bitmask)
+}
+varset_t;
+
+typedef struct
+{
+    uint8_t score[256];
+    int nvar, mvar;
+    var_t *var;             // list of all variants from all readers
+    int nvset, mvset;
+    int mpmat, *pmat;       // pairing matrix, i-th vset and j-th group accessible as i*ngrp+j
+    int ngrp, mgrp;
+    int mcnt, *cnt;         // number of VCF covered by a varset
+    grp_t *grp;             // list of VCF representatives, each with a unique combination of duplicate lines
+    varset_t *vset;         // list of variant sets - combinations of compatible variants across multiple groups ready for output
+    vcf_buf_t *vcf_buf;     // records sorted in output order, for each VCF
+    bcf_srs_t *sr;
+    void *grp_str2int;
+    void *var_str2int;
+    kstring_t str;
+    int moff, noff, *off, mcharp;
+    char **charp;
+    const char *chr;
+    hts_pos_t pos;
+    int nsr, msr;
+    int pair;
+    int nactive, mactive, *active;  // list of readers with lines at the current pos
+}
+sr_sort_t;
+
+sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt);
+void bcf_sr_sort_reset(sr_sort_t *srt);
+int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, hts_pos_t pos);
+int bcf_sr_sort_set_active(sr_sort_t *srt, int i);
+int bcf_sr_sort_add_active(sr_sort_t *srt, int i);
+void bcf_sr_sort_destroy(sr_sort_t *srt);
+void bcf_sr_sort_remove_reader(bcf_srs_t *readers, sr_sort_t *srt, int i);
+
+#endif
--- a/ext/htslib/bgzf.c
+++ b/ext/htslib/bgzf.c
--- a/ext/htslib/bgzip.1
+++ b/ext/htslib/bgzip.1
@ -0,0 +1,206 @@
+.TH bgzip 1 "12 September 2024" "htslib-1.21" "Bioinformatics tools"
+.SH NAME
+.PP
+bgzip \- Block compression/decompression utility
+.\"
+.\" Copyright (C) 2009-2011 Broad Institute.
+.\" Copyright (C) 2018, 2021-2024 Genome Research Limited.
+.\"
+.\" Author: Heng Li <lh3@sanger.ac.uk>
+.\"
+.\" Permission is hereby granted, free of charge, to any person obtaining a
+.\" copy of this software and associated documentation files (the "Software"),
+.\" to deal in the Software without restriction, including without limitation
+.\" the rights to use, copy, modify, merge, publish, distribute, sublicense,
+.\" and/or sell copies of the Software, and to permit persons to whom the
+.\" Software is furnished to do so, subject to the following conditions:
+.\"
+.\" The above copyright notice and this permission notice shall be included in
+.\" all copies or substantial portions of the Software.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+.\" DEALINGS IN THE SOFTWARE.
+.\"
+.
+.\" For code blocks and examples (cf groff's Ultrix-specific man macros)
+.de EX
+
+.  in +\\$1
+.  nf
+.  ft CR
+..
+.de EE
+.  ft
+.  fi
+.  in
+
+..
+.SH SYNOPSIS
+.PP
+.B bgzip
+.RB [ -cdfhikrt ]
+.RB [ -b
+.IR virtualOffset ]
+.RB [ -I
+.IR index_name ]
+.RB [ -l
+.IR compression_level ]
+.RB [ -o
+.IR outfile ]
+.RB [ -s
+.IR size ]
+.RB [ -@
+.IR threads ]
+.RI [ file " ...]"
+.PP
+.SH DESCRIPTION
+.PP
+Bgzip compresses files in a similar manner to, and compatible with, gzip(1).
+The file is compressed into a series of small (less than 64K) 'BGZF' blocks.
+This allows indexes to be built against the compressed file and used to
+retrieve portions of the data without having to decompress the entire file. 
+
+If no files are specified on the command line, bgzip will compress (or
+decompress if the -d option is used) standard input to standard output.
+If a file is specified, it will be compressed (or decompressed with -d).
+If the -c option is used, the result will be written to standard output,
+otherwise when compressing bgzip will write to a new file with a .gz
+suffix and remove the original.  When decompressing the input file must
+have a .gz suffix, which will be removed to make the output name.  Again
+after decompression completes the input file will be removed. When multiple
+files are given as input, the operation is performed on all of them. Access
+and modification time of input file from filesystem is set to output file. 
+Note, access time may get updated by system when it deems appropriate.
+
+.SH OPTIONS
+.TP 10
+.B "--binary"
+Bgzip will attempt to ensure BGZF blocks end on a newline when the
+input is a text file.  The exception to this is where a single line is
+larger than a BGZF block (64Kb).  This can aid tools that use the
+index to perform random access on the compressed stream, as the start
+of a block is likely to also be the start of a text record.
+
+This option processes text files as if they were binary content,
+ignoring the location of newlines.  This also restores the behaviour
+for text files to bgzip version 1.15 and earlier.
+.TP
+.BI "-b, --offset " INT
+Decompress to standard output from virtual file position (0-based uncompressed
+offset).
+Implies -c and -d.
+.TP
+.B "-c, --stdout"
+Write to standard output, keep original files unchanged.
+.TP
+.B "-d, --decompress"
+Decompress.
+.TP
+.B "-f, --force"
+Overwrite files without asking, or decompress files that don't have a known
+compression filename extension (e.g., \fI.gz\fR) without asking.
+Use \fB--force\fR twice to do both without asking.
+.TP
+.B "-g, --rebgzip"
+Try to use an existing index to create a compressed file with matching
+block offsets.  The index must be specified using the \fB-I
+\fIfile.gzi\fR option.
+Note that this assumes that the same compression library and level are in use
+as when making the original file.
+Don't use it unless you know what you're doing.
+.TP
+.B "-h, --help"
+Displays a help message.
+.TP
+.B "-i, --index"
+Create a BGZF index while compressing.
+Unless the -I option is used, this will have the name of the compressed
+file with .gzi appended to it.
+.TP
+.BI "-I, --index-name " FILE
+Index file name.
+.TP
+.B "-k, --keep"
+Do not delete input file during operation.
+.TP
+.BI "-l, --compress-level " INT
+Compression level to use when compressing.
+From 0 to 9, or -1 for the default level set by the compression library. [-1]
+.TP
+.BI "-o, --output " FILE
+Write to a file, keep original files unchanged, will overwrite an existing
+file.
+.TP
+.B "-r, --reindex"
+Rebuild the index on an existing compressed file.
+.TP
+.BI "-s, --size " INT
+Decompress INT bytes (uncompressed size) to standard output.
+Implies -c.
+.TP
+.B "-t, --test"
+Test the integrity of the compressed file.
+.TP
+.BI "-@, --threads " INT
+Number of threads to use [1].
+.PP
+
+.SH BGZF FORMAT
+The BGZF format written by bgzip is described in the SAM format specification
+available from http://samtools.github.io/hts-specs/SAMv1.pdf.
+
+It makes use of a gzip feature which allows compressed files to be
+concatenated.
+The input data is divided into blocks which are no larger than 64 kilobytes
+both before and after compression (including compression headers).
+Each block is compressed into a gzip file.
+The gzip header includes an extra sub-field with identifier 'BC' and the length
+of the compressed block, including all headers.
+
+.SH GZI FORMAT
+The index format is a binary file listing pairs of compressed and
+uncompressed offsets in a BGZF file.
+Each compressed offset points to the start of a BGZF block.
+The uncompressed offset is the corresponding location in the uncompressed
+data stream.
+
+All values are stored as little-endian 64-bit unsigned integers.
+
+The file contents are:
+.EX 4
+uint64_t number_entries
+.EE
+followed by number_entries pairs of:
+.EX 4
+uint64_t compressed_offset
+uint64_t uncompressed_offset
+.EE
+
+.SH EXAMPLES
+.EX 4
+# Compress stdin to stdout
+bgzip < /usr/share/dict/words > /tmp/words.gz
+
+# Make a .gzi index
+bgzip -r /tmp/words.gz
+
+# Extract part of the data using the index
+bgzip -b 367635 -s 4 /tmp/words.gz 
+
+# Uncompress the whole file, removing the compressed copy
+bgzip -d /tmp/words.gz
+.EE
+
+.SH AUTHOR
+.PP
+The BGZF library was originally implemented by Bob Handsaker and modified
+by Heng Li for remote file access and in-memory caching.
+
+.SH SEE ALSO
+.IR gzip (1),
+.IR tabix (1)
--- a/ext/htslib/bgzip.c
+++ b/ext/htslib/bgzip.c
@ -0,0 +1,771 @@
+/* bgzip.c -- Block compression/decompression utility.
+
+   Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology
+   Copyright (C) 2010, 2013-2019, 2021-2024 Genome Research Ltd.
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notices and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "htslib/bgzf.h"
+#include "htslib/hts.h"
+#include "htslib/hfile.h"
+
+#ifdef _WIN32
+#  define WIN32_LEAN_AND_MEAN
+#  include <windows.h>
+#  include <sys/utime.h>
+#endif
+
+static const int WINDOW_SIZE = BGZF_BLOCK_SIZE;
+
+static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) error(const char *format, ...)
+{
+    va_list ap;
+    va_start(ap, format);
+    vfprintf(stderr, format, ap);
+    va_end(ap);
+    exit(EXIT_FAILURE);
+}
+
+static int ask_yn(void)
+{
+    char line[1024];
+    if (fgets(line, sizeof line, stdin) == NULL)
+        return 0;
+    return line[0] == 'Y' || line[0] == 'y';
+}
+
+static int confirm_overwrite(const char *fn)
+{
+    int save_errno = errno;
+    int ret = 0;
+
+    if (isatty(STDIN_FILENO)) {
+        fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
+        if (ask_yn()) ret = 1;
+    }
+
+    errno = save_errno;
+    return ret;
+}
+
+static int known_extension(const char *ext)
+{
+    static const char *known[] = {
+        "gz", "bgz", "bgzf",
+        NULL
+    };
+
+    const char **p;
+    for (p = known; *p; p++)
+        if (strcasecmp(ext, *p) == 0) return 1;
+    return 0;
+}
+
+static int confirm_filename(int *is_forced, const char *name, const char *ext)
+{
+    if (*is_forced) {
+        (*is_forced)--;
+        return 1;
+    }
+
+    if (!isatty(STDIN_FILENO))
+        return 0;
+
+    fprintf(stderr, "[bgzip] .%s is not a known extension; do you wish to decompress to %s (y or n)? ", ext, name);
+    return ask_yn();
+}
+
+/* getfilespec - get file status data
+   path        - file path for which status to be retrieved
+   status      - pointer to status structure in which the data to be stored
+   returns 0 on success and -1 on failure
+*/
+static int getfilespec(const char *path, struct stat *status)
+{
+    if (!path || !status) {     //invalid
+        return -1;
+    }
+    if (!strcmp(path, "-")) {   //cant get / set for stdin/out, return success
+        return 0;
+    }
+    if (stat(path, status) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+/* setfilespec - set file status data
+   path        - file path for which status to be set
+   status      - pointer to status structure in which the data is present
+   returns 0 on success and -1 on failure
+   sets only the time as of now.
+*/
+static int setfilespec(const char *path, const struct stat *status)
+{
+    if (!path || !status) {     //invalid
+        return -1;
+    }
+    if (!strcmp(path, "-")) {   //cant get / set for stdin/out, return success
+        return 0;
+    }
+
+#ifdef _WIN32
+    struct _utimbuf tval;
+    //time upto sec - access & modification time
+    tval.actime = status->st_atime;
+    tval.modtime = status->st_mtime;
+    if (_utime(path, &tval) < 0) {
+        fprintf(stderr, "[bgzip] Failed to set file specifications.\n");
+        return -1;
+    }
+#else
+    struct timeval tval[2];
+    memset(&tval[0], 0, sizeof(tval));
+    //time upto sec - access time
+    tval[0].tv_sec = status->st_atime;
+    //time upto sec - modification time
+    tval[1].tv_sec = status->st_mtime;
+    if (utimes(path, &tval[0]) < 0) {
+        fprintf(stderr, "[bgzip] Failed to set file specifications.\n");
+        return -1;
+    }
+#endif //_WIN32
+    return 0;
+}
+
+
+static int check_name_and_extension(char *name, int *forced) {
+    size_t pos;
+    char *ext;
+
+    for (pos = strlen(name); pos > 0; --pos)
+        if (name[pos] == '.' || name[pos] == '/') break;
+
+    if (pos == 0 || name[pos] != '.') {
+        fprintf(stderr, "[bgzip] can't find an extension in %s -- please rename\n", name);
+        return 1;
+    }
+
+    name[pos] = '\0';
+    ext = &name[pos+1];
+
+    if (!(known_extension(ext) || confirm_filename(forced, name, ext))) {
+        fprintf(stderr, "[bgzip] unknown extension .%s -- declining to decompress to %s\n", ext, name);
+        return 2;                            //explicit N, continue and return 2
+    }
+
+    return 0;
+}
+
+
+static int bgzip_main_usage(FILE *fp, int status)
+{
+    fprintf(fp, "\n");
+    fprintf(fp, "Version: %s\n", hts_version());
+    fprintf(fp, "Usage:   bgzip [OPTIONS] [FILE] ...\n");
+    fprintf(fp, "Options:\n");
+    fprintf(fp, "   -b, --offset INT           decompress at virtual file pointer (0-based uncompressed offset)\n");
+    fprintf(fp, "   -c, --stdout               write on standard output, keep original files unchanged\n");
+    fprintf(fp, "   -d, --decompress           decompress\n");
+    fprintf(fp, "   -f, --force                overwrite files without asking\n");
+    fprintf(fp, "   -g, --rebgzip              use an index file to bgzip a file\n");
+    fprintf(fp, "   -h, --help                 give this help\n");
+    fprintf(fp, "   -i, --index                compress and create BGZF index\n");
+    fprintf(fp, "   -I, --index-name FILE      name of BGZF index file [file.gz.gzi]\n");
+    fprintf(fp, "   -k, --keep                 don't delete input files during operation\n");
+    fprintf(fp, "   -l, --compress-level INT   Compression level to use when compressing; 0 to 9, or -1 for default [-1]\n");
+    fprintf(fp, "   -o, --output FILE          write to file, keep original files unchanged\n");
+    fprintf(fp, "   -r, --reindex              (re)index compressed file\n");
+    fprintf(fp, "   -s, --size INT             decompress INT bytes (uncompressed size)\n");
+    fprintf(fp, "   -t, --test                 test integrity of compressed file\n");
+    fprintf(fp, "       --binary               Don't align blocks with text lines\n");
+    fprintf(fp, "   -@, --threads INT          number of compression threads to use [1]\n");
+    return status;
+}
+
+int main(int argc, char **argv)
+{
+    int c, compress, compress_level = -1, pstdout, is_forced, test, index = 0, rebgzip = 0, reindex = 0, keep, binary;
+    BGZF *fp;
+    char *buffer;
+    long start, end, size;
+    struct stat filestat;
+    char *statfilename = NULL;
+    char *index_fname = NULL, *write_fname = NULL;
+    int threads = 1, isstdin = 0, usedstdout = 0, ret = 0, exp_out_open = 0, f_dst = -1;
+
+    static const struct option loptions[] =
+    {
+        {"help", no_argument, NULL, 'h'},
+        {"offset", required_argument, NULL, 'b'},
+        {"stdout", no_argument, NULL, 'c'},
+        {"decompress", no_argument, NULL, 'd'},
+        {"force", no_argument, NULL, 'f'},
+        {"index", no_argument, NULL, 'i'},
+        {"index-name", required_argument, NULL, 'I'},
+        {"compress-level", required_argument, NULL, 'l'},
+        {"reindex", no_argument, NULL, 'r'},
+        {"rebgzip",no_argument,NULL,'g'},
+        {"size", required_argument, NULL, 's'},
+        {"threads", required_argument, NULL, '@'},
+        {"test", no_argument, NULL, 't'},
+        {"version", no_argument, NULL, 1},
+        {"keep", no_argument, NULL, 'k'},
+        {"binary", no_argument, NULL, 2},
+        {"output", required_argument, NULL, 'o'},
+        {NULL, 0, NULL, 0}
+    };
+
+    compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; test = 0; keep = 0; binary = 0;
+    while((c  = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:grtko:",loptions,NULL)) >= 0){
+        switch(c){
+        case 'd': compress = 0; break;
+        case 'c': pstdout = 1; break;
+        case 'b': start = atol(optarg); compress = 0; pstdout = 1; break;
+        case 's': size = atol(optarg); pstdout = 1; break;
+        case 'f': is_forced++; break;
+        case 'i': index = 1; break;
+        case 'I': index_fname = optarg; break;
+        case 'l': compress_level = atol(optarg); break;
+        case 'g': rebgzip = 1; break;
+        case 'r': reindex = 1; compress = 0; break;
+        case '@': threads = atoi(optarg); break;
+        case 't': test = 1; compress = 0; reindex = 0; break;
+        case 'k': keep = 1; break;
+        case 'o': write_fname = optarg; break;
+        case 1:
+            printf(
+"bgzip (htslib) %s\n"
+"Copyright (C) 2024 Genome Research Ltd.\n", hts_version());
+            return EXIT_SUCCESS;
+        case  2:  binary = 1; break;
+        case 'h': return bgzip_main_usage(stdout, EXIT_SUCCESS);
+        case '?': return bgzip_main_usage(stderr, EXIT_FAILURE);
+        }
+    }
+    if (size >= 0) end = start + size;
+    if (end >= 0 && end < start) {
+        fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
+        return 1;
+    }
+
+    if ( (index || reindex) && rebgzip )
+    {
+        fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n");
+        return 1;
+    }
+    if ( rebgzip && !index_fname )
+    {
+        fprintf(stderr, "[bgzip] Index file name expected with rebgzip.  See -I option.\n");
+        return 1;
+    }
+    /* avoid -I / indexfile with multiple inputs while index/reindex. these wont be set during
+    read/decompress and are not considered even if set */
+    if ( (index || reindex) && !write_fname && index_fname && argc - optind > 1) {
+        fprintf(stderr, "[bgzip] Cannot specify index filename with multiple data file on index, reindex.\n");
+        return 1;
+    }
+
+    if (write_fname) {
+        if (pstdout) {
+            fprintf(stderr, "[bgzip] Cannot write to %s and stdout at the same time.\n", write_fname);
+            return 1;
+        } else if (strncmp(write_fname, "-", strlen(write_fname)) == 0) {
+            // stdout has special handling so treat as -c
+            pstdout = 1;
+            write_fname = NULL;
+        }
+    }
+
+    do {
+        isstdin = optind >= argc ? 1 : !strcmp("-", argv[optind]);          //using stdin or not?
+        /* when a named output file is not used, stdout is in use when explicitly
+        selected or when stdin in is in use, it needs to be closed
+        explicitly to get all io errors*/
+
+        if (!write_fname)
+            usedstdout |= isstdin || pstdout || test;
+
+        statfilename = NULL;
+
+        if (compress == 1) {
+            hFILE* f_src = NULL;
+            char out_mode[3] = "w\0";
+            char out_mode_exclusive[4] = "wx\0";
+
+            if (compress_level < -1 || compress_level > 9) {
+                fprintf(stderr, "[bgzip] Invalid compress-level: %d\n", compress_level);
+                return 1;
+            }
+            if (compress_level >= 0) {
+                out_mode[1] = compress_level + '0';
+                out_mode_exclusive[2] = compress_level + '0';
+            }
+            if (!(f_src = hopen(!isstdin ? argv[optind] : "-", "r"))) {
+                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), isstdin ? "stdin" : argv[optind]);
+                return 1;
+            }
+
+            if (write_fname) {
+                if (!exp_out_open) {  // only open this file once for writing, close at the end
+                    if ((fp = bgzf_open(write_fname, out_mode)) == NULL) {
+                        fprintf(stderr, "[bgzip] can't create %s: %s\n", write_fname, strerror(errno));
+                        return 1;
+                    } else {
+                        exp_out_open = 1;
+                    }
+                }
+            } else if ( argc>optind && !isstdin )            //named input file that isn't an explicit "-"
+            {
+                if (pstdout)
+                    fp = bgzf_open("-", out_mode);
+                else
+                {
+                    char *name = malloc(strlen(argv[optind]) + 5);
+                    strcpy(name, argv[optind]);
+                    strcat(name, ".gz");
+                    fp = bgzf_open(name, is_forced? out_mode : out_mode_exclusive);
+                    if (fp == NULL && errno == EEXIST) {
+                        if (confirm_overwrite(name)) {
+                            fp = bgzf_open(name, out_mode);
+                        }
+                        else {
+                            ret = 2;                        //explicit N - no overwrite, continue and return 2
+                            hclose_abruptly(f_src);
+                            free(name);
+                            continue;
+                        }
+                    }
+                    if (fp == NULL) {
+                        fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
+                        free(name);
+                        return 1;
+                    }
+                    statfilename = name;
+                }
+            }
+            else if (!pstdout && isatty(fileno((FILE *)stdout)) )
+                return bgzip_main_usage(stderr, EXIT_FAILURE);
+            else if ( index && !index_fname )
+            {
+                fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
+                return 1;
+            }
+            else
+                fp = bgzf_open("-", out_mode);
+
+            if ( index ) bgzf_index_build_init(fp);
+            if (threads > 1)
+                bgzf_mt(fp, threads, 256);
+
+            buffer = malloc(WINDOW_SIZE);
+            if (!buffer) {
+                if (statfilename) {
+                    free(statfilename);
+                }
+                return 1;
+            }
+            if (rebgzip){
+                if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.%s\n", !isstdin ? argv[optind] : index_fname, !isstdin ? "gzi" : "");
+
+                while ((c = hread(f_src, buffer, WINDOW_SIZE)) > 0)
+                    if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
+            }
+            else {
+                htsFormat fmt;
+                int textual = 0;
+                if (!binary
+                    && hts_detect_format(f_src, &fmt) == 0
+                    && fmt.compression == no_compression) {
+                    switch(fmt.format) {
+                    case text_format:
+                    case sam:
+                    case vcf:
+                    case bed:
+                    case fasta_format:
+                    case fastq_format:
+                    case fai_format:
+                    case fqi_format:
+                        textual = 1;
+                        break;
+                    default: break; // silence clang warnings
+                    }
+                }
+
+                if (binary || !textual) {
+                    // Binary data, either detected or explicit
+                    while ((c = hread(f_src, buffer, WINDOW_SIZE)) > 0)
+                        if (bgzf_write(fp, buffer, c) < 0)
+                            error("Could not write %d bytes: Error %d\n",
+                                c, fp->errcode);
+                } else {
+                    /* Text mode, try a flush after a newline */
+                    int in_header = 1, n = 0, long_line = 0;
+                    while ((c = hread(f_src, buffer+n, WINDOW_SIZE-n)) > 0) {
+                        int c2 = c+n;
+                        int flush = 0;
+                        if (in_header &&
+                            (long_line || buffer[0] == '@' || buffer[0] == '#')) {
+                            // Scan forward to find the last header line.
+                            int last_start = 0;
+                            n = 0;
+                            while (n < c2) {
+                                if (buffer[n++] != '\n')
+                                    continue;
+
+                                last_start = n;
+                                if (n < c2 &&
+                                    !(buffer[n] == '@' || buffer[n] == '#')) {
+                                    in_header = 0;
+                                    break;
+                                }
+                            }
+                            if (!last_start) {
+                                n = c2;
+                                long_line = 1;
+                            } else {
+                                n = last_start;
+                                flush = 1;
+                                long_line = 0;
+                            }
+                        } else {
+                            // Scan backwards to find the last newline.
+                            n += c; // c read plus previous n overflow
+                            while (--n >= 0 && ((char *)buffer)[n] != '\n')
+                                ;
+
+                            if (n >= 0) {
+                                flush = 1;
+                                n++;
+                            } else {
+                                n = c2;
+                            }
+                        }
+
+                        // Pos n is either at the end of the buffer with flush==0,
+                        // or the first byte after a newline and a flush point.
+                        if (bgzf_write(fp, buffer, n) < 0)
+                            error("Could not write %d bytes: Error %d\n",
+                                n, fp->errcode);
+                        if (flush)
+                            if (bgzf_flush_try(fp, 65536) < 0) {// force
+                                if (statfilename) {
+                                    free(statfilename);
+                                }
+                                return -1;
+                            }
+
+                        memmove(buffer, buffer+n, c2-n);
+                        n = c2-n;
+                    }
+
+                    // Trailing data.
+                    if (bgzf_write(fp, buffer, n) < 0)
+                        error("Could not write %d bytes: Error %d\n",
+                            n, fp->errcode);
+                }
+            }
+            if ( index && !write_fname )
+            {
+                if (index_fname) {
+                    if (bgzf_index_dump(fp, index_fname, NULL) < 0)
+                        error("Could not write index to '%s'\n", index_fname);
+                } else if (!isstdin) {
+                    if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0)
+                        error("Could not write index to '%s.gz.gzi'\n", argv[optind]);
+                }
+                else {
+                    //stdin, cant create index file as name is not present "-.gz.gzi" not a valid one!
+                    error("Can not write index for stdin data without index filename, use -I option to set index file.\n");
+                }
+            }
+
+            if (!write_fname) {
+                if (bgzf_close(fp) < 0)
+                    error("Output close failed: Error %d\n", fp->errcode);
+            }
+
+            if (hclose(f_src) < 0)
+                error("Input close failed\n");
+
+            if (statfilename) {
+                //get input file timestamp
+                if (!getfilespec(argv[optind], &filestat)) {
+                    //set output file timestamp
+                    if (setfilespec(statfilename, &filestat) < 0) {
+                        fprintf(stderr, "[bgzip] Failed to set file specification.\n");
+                    }
+                }
+                else {
+                    fprintf(stderr, "[bgzip] Failed to get file specification.\n");
+                }
+                free(statfilename);
+            }
+
+            if (argc > optind && !pstdout && !keep && !isstdin && !write_fname) unlink(argv[optind]);
+
+            free(buffer);
+        }
+        else if ( reindex )
+        {
+            if ( argc>optind && !isstdin )
+            {
+                fp = bgzf_open(argv[optind], "r");
+                if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]);
+            }
+            else
+            {
+                if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n");
+                fp = bgzf_open("-", "r");
+                if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno));
+            }
+
+            buffer = malloc(BGZF_BLOCK_SIZE);
+            bgzf_index_build_init(fp);
+            int ret;
+            while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ;
+            free(buffer);
+            if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");
+
+            if ( index_fname ) {
+                if (bgzf_index_dump(fp, index_fname, NULL) < 0)
+                    error("Could not write index to '%s'\n", index_fname);
+            } else if (!isstdin) {
+                if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0)
+                    error("Could not write index to '%s.gzi'\n", argv[optind]);
+            }
+            else {
+                //stdin, cant create index file as name is not present "-.gzi" not a valid one!
+                error("Can not write index for stdin data without index filename, use -I option to set index file.\n");
+            }
+
+            if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode);
+        }
+        else
+        {
+            int is_forced_tmp = is_forced;
+
+            if ( argc>optind && !isstdin )
+            {
+                fp = bgzf_open(argv[optind], "r");
+                if (fp == NULL) {
+                    fprintf(stderr, "[bgzip] Could not open %s: %s\n", argv[optind], strerror(errno));
+                    return 1;
+                }
+                if (bgzf_compression(fp) == no_compression) {
+                    fprintf(stderr, "[bgzip] %s: not a compressed file -- ignored\n", argv[optind]);
+                    bgzf_close(fp);
+                    return 1;
+                }
+
+                if (pstdout || test) {
+                    f_dst = fileno(stdout);
+                } else {
+                    const int wrflags = O_WRONLY | O_CREAT | O_TRUNC;
+                    char *name;
+                    int check;
+
+                    if (!(name = strdup(argv[optind]))) {
+                        fprintf(stderr, "[bgzip] unable to allocate memory for output file name.\n");
+                        bgzf_close(fp);
+                        return 1;
+                    }
+
+                    if ((check = check_name_and_extension(name, &is_forced_tmp))) {
+                        bgzf_close(fp);
+
+                        if (check == 1) {
+                            return 1;
+                        } else {
+                            ret = 2;
+                            continue;
+                        }
+                    }
+
+                    if (!exp_out_open) {
+                        if (write_fname) { // only open file once and don't care about overwriting
+                            is_forced_tmp = 1;
+                            exp_out_open = 1;
+                        }
+
+                        f_dst = open(write_fname ? write_fname : name, is_forced_tmp? wrflags : wrflags|O_EXCL, 0666);
+
+                        if (f_dst < 0 && errno == EEXIST) {
+                            if (confirm_overwrite(name)) {
+                                f_dst = open(name, wrflags, 0666);
+                            }
+                            else {
+                                ret = 2;                        //explicit N - no overwrite, continue and return 2
+                                bgzf_close(fp);
+                                free(name);
+                                continue;
+                            }
+                        }
+                        if (f_dst < 0) {
+                            fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
+                            free(name);
+                            return 1;
+                        }
+                    }
+
+                    statfilename = name;
+                }
+            }
+            else if (!pstdout && isatty(fileno((FILE *)stdin)) )
+                return bgzip_main_usage(stderr, EXIT_FAILURE);
+            else
+            {
+                f_dst = fileno(stdout);
+                fp = bgzf_open("-", "r");
+                if (fp == NULL) {
+                    fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
+                    return 1;
+                }
+                if (bgzf_compression(fp) == no_compression) {
+                    fprintf(stderr, "[bgzip] stdin is not compressed -- ignored\n");
+                    bgzf_close(fp);
+                    return 1;
+                }
+
+                if (!write_fname) {
+                    f_dst = fileno(stdout);
+                } else {
+                    if (!exp_out_open) {
+                        exp_out_open = 1;
+
+                        f_dst = open(write_fname, O_WRONLY | O_CREAT | O_TRUNC, 0666);
+
+                        if (f_dst < 0) {
+                            fprintf(stderr, "[bgzip] can't create %s: %s\n", write_fname, strerror(errno));
+                            return 1;
+                        }
+                    }
+                }
+            }
+
+            buffer = malloc(WINDOW_SIZE);
+            if ( start>0 )
+            {
+                if (index_fname) {
+                    if ( bgzf_index_load(fp, index_fname, NULL) < 0 )
+                        error("Could not load index: %s\n", index_fname);
+                } else {
+                    if (optind >= argc || isstdin) {
+                        error("The -b option requires -I when reading from stdin "
+                            "(and stdin must be seekable)\n");
+                    }
+                    if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 )
+                        error("Could not load index: %s.gzi\n", argv[optind]);
+                }
+                if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %ld-th (uncompressd) byte\n", start);
+            }
+
+            if (threads > 1)
+                bgzf_mt(fp, threads, 256);
+
+    #ifdef _WIN32
+            _setmode(f_dst, O_BINARY);
+    #endif
+            long start_reg = start, end_reg = end;
+            while (1) {
+                if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
+                else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
+                if (c == 0) break;
+                if (c < 0) error("Error %d in block starting at offset %" PRId64 "(%" PRIX64 ")\n", fp->errcode, fp->block_address, fp->block_address);
+                start += c;
+                if ( !test && write(f_dst, buffer, c) != c ) {
+    #ifdef _WIN32
+                    if (GetLastError() != ERROR_NO_DATA)
+    #endif
+                    error("Could not write %d bytes\n", c);
+                }
+                if (end >= 0 && start >= end) break;
+            }
+            start = start_reg;
+            end = end_reg;
+            free(buffer);
+            if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
+
+            if (statfilename) {
+                if (!write_fname) {
+                    //get input file timestamp
+                    if (!getfilespec(argv[optind], &filestat)) {
+                        //set output file timestamp
+                        if (setfilespec(statfilename, &filestat) < 0) {
+                            fprintf(stderr, "[bgzip] Failed to set file specification.\n");
+                        }
+                    }
+                    else {
+                        fprintf(stderr, "[bgzip] Failed to get file specification.\n");
+                    }
+                }
+
+                free(statfilename);
+            }
+
+            if (argc > optind && !pstdout && !test && !keep && !isstdin && !write_fname) unlink(argv[optind]);
+            if (!isstdin && !pstdout && !test && !write_fname) {
+                close(f_dst);                               //close output file when it is not stdout
+            }
+        }
+    } while (++optind < argc);
+
+    if (usedstdout && !reindex) {
+        //stdout in use, have to close explicitly to get any pending write errors
+        if (fclose(stdout) != 0 && errno != EBADF) {
+            fprintf(stderr, "[bgzip] Failed to close stdout, errno %d", errno);
+            ret = 1;
+        }
+    } else if (write_fname) {
+        if (compress == 1) { // close explicit output file (this is for compression)
+            if (index) {
+                if (index_fname) {
+                    if (bgzf_index_dump(fp, index_fname, NULL) < 0)
+                        error("Could not write index to '%s'\n", index_fname);
+                } else {
+                    if (bgzf_index_dump(fp, write_fname, ".gzi") < 0)
+                        error("Could not write index to '%s.gzi'\n", write_fname);
+                }
+            }
+
+            if (bgzf_close(fp) < 0)
+                error("Output close failed: Error %d\n", fp->errcode);
+        } else {
+            close(f_dst);
+        }
+    }
+
+
+    return ret;
+}
--- a/ext/htslib/builddir_vars.mk.in
+++ b/ext/htslib/builddir_vars.mk.in
@ -0,0 +1,58 @@
+# Separate build directory Makefile overrides for htslib.
+#
+#    Copyright (C) 2021 University of Glasgow.
+#
+#    Author: John Marshall <jmarshall@hey.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# This is @configure_input@
+#
+# When building from a separate build directory, this file is included by
+# HTSlib's Makefile or htslib.mk instead of htslib_vars.mk. It adjusts
+# variables to account for a separate source directory and then includes
+# the real makefile fragment.
+
+ifneq "$(HTSPREFIX)" ""
+# When included externally via htslib.mk, just update $(HTSSRCDIR) and hence
+# $(HTSPREFIX) to point to the source directory (without using any extra
+# unprefixed variables, which would be in the external project's namespace).
+
+# Set to $(HTSDIR)/$(srcdir) (or just $(srcdir) if that's absolute)
+HTSSRCDIR = @HTSDIRslash_if_relsrcdir@@srcdir@
+
+include $(HTSSRCDIR)/htslib_vars.mk
+
+else
+# When included from HTSlib's Makefile, override $(srcdir) and set VPATH,
+# and make any other adjustments required. ($(HTSPREFIX) remains empty as
+# the items it prefixes will be found via VPATH instead.)
+
+srcdir = @srcdir@
+VPATH  = @srcdir@
+
+srcprefix = $(srcdir)/
+
+# Ensure that htscodecs.c can include its version.h. This -I option must come
+# before -I. so that these targets get this version.h rather than HTSlib's.
+htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: ALL_CPPFLAGS = -Ihtscodecs/htscodecs -I. $(CPPFLAGS)
+
+include $(srcdir)/htslib_vars.mk
+
+endif
--- a/ext/htslib/config.h.in~
+++ b/ext/htslib/config.h.in~
@ -0,0 +1,180 @@
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* If you use configure, this file provides #defines reflecting your
+   configuration choices.  If you have not run configure, suitable
+   conservative defaults will be used.
+
+   Autoheader adds a number of items to this template file that are not
+   used by HTSlib: STDC_HEADERS and most HAVE_*_H header file defines
+   are immaterial, as we assume standard ISO C headers and facilities;
+   the PACKAGE_* defines are unused and are overridden by the more
+   accurate PACKAGE_VERSION as computed by the Makefile.  */
+
+/* Define if HTSlib should enable GCS support. */
+#undef ENABLE_GCS
+
+/* Define if HTSlib should enable plugins. */
+#undef ENABLE_PLUGINS
+
+/* Define if HTSlib should enable S3 support. */
+#undef ENABLE_S3
+
+/* Define if __attribute__((constructor)) is available. */
+#undef HAVE_ATTRIBUTE_CONSTRUCTOR
+
+/* Define if __attribute__((target(...))) is available. */
+#undef HAVE_ATTRIBUTE_TARGET
+
+/* Defined to 1 if rANS source using AVX2 can be compiled. */
+#undef HAVE_AVX2
+
+/* Defined to 1 if rANS source using AVX512F can be compiled. */
+#undef HAVE_AVX512
+
+/* Defined to 1 if __builtin_cpu_supports("ssse3") works */
+#undef HAVE_BUILTIN_CPU_SUPPORT_SSSE3
+
+/* Define if clock_gettime exists and accepts CLOCK_PROCESS_CPUTIME_ID. */
+#undef HAVE_CLOCK_GETTIME_CPUTIME
+
+/* Define if you have the Common Crypto library. */
+#undef HAVE_COMMONCRYPTO
+
+/* Define to 1 if you have the declaration of '__cpuid_count', and to 0 if you
+   don't. */
+#undef HAVE_DECL___CPUID_COUNT
+
+/* Define to 1 if you have the declaration of '__get_cpuid_max', and to 0 if
+   you don't. */
+#undef HAVE_DECL___GET_CPUID_MAX
+
+/* Define to 1 if you have the 'drand48' function. */
+#undef HAVE_DRAND48
+
+/* Define if using an external libhtscodecs */
+#undef HAVE_EXTERNAL_LIBHTSCODECS
+
+/* Define to 1 if you have the 'fdatasync' function. */
+#undef HAVE_FDATASYNC
+
+/* Define to 1 if you have the 'fsync' function. */
+#undef HAVE_FSYNC
+
+/* Define to 1 if you have the 'getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define to 1 if you have the 'gmtime_r' function. */
+#undef HAVE_GMTIME_R
+
+/* Define if you have libcrypto-style HMAC(). */
+#undef HAVE_HMAC
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the 'bz2' library (-lbz2). */
+#undef HAVE_LIBBZ2
+
+/* Define if libcurl file access is enabled. */
+#undef HAVE_LIBCURL
+
+/* Define if libdeflate is available. */
+#undef HAVE_LIBDEFLATE
+
+/* Define to 1 if you have the 'lzma' library (-llzma). */
+#undef HAVE_LIBLZMA
+
+/* Define to 1 if you have the 'z' library (-lz). */
+#undef HAVE_LIBZ
+
+/* Define to 1 if you have the <lzma.h> header file. */
+#undef HAVE_LZMA_H
+
+/* Define to 1 if you have a working 'mmap' system call. */
+#undef HAVE_MMAP
+
+/* Defined to 1 if rANS source using popcnt can be compiled. */
+#undef HAVE_POPCNT
+
+/* Define to 1 if you have the 'srand48_deterministic' function. */
+#undef HAVE_SRAND48_DETERMINISTIC
+
+/* Defined to 1 if rANS source using SSE4.1 can be compiled. */
+#undef HAVE_SSE4_1
+
+/* Defined to 1 if rANS source using SSSE3 can be compiled. */
+#undef HAVE_SSSE3
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdio.h> header file. */
+#undef HAVE_STDIO_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#undef HAVE_SYS_PARAM_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Platform-dependent plugin filename extension. */
+#undef PLUGIN_EXT
+
+/* Define to 1 if all of the C89 standard headers exist (not just the ones
+   required in a freestanding environment). This macro is provided for
+   backward compatibility; new code need not use it. */
+#undef STDC_HEADERS
+
+
+/* Prevent unaligned access in htscodecs SSE4 rANS codec */
+#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0
+#undef UBSAN
+#endif
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+#undef _FILE_OFFSET_BITS
+
+/* Define to 1 on platforms where this makes off_t a 64-bit type. */
+#undef _LARGE_FILES
+
+/* Number of bits in time_t, on hosts where this is settable. */
+#undef _TIME_BITS
+
+/* Specify X/Open requirements */
+#undef _XOPEN_SOURCE
+
+/* Define to 1 on platforms where this makes time_t a 64-bit type. */
+#undef __MINGW_USE_VC2005_COMPAT
--- a/ext/htslib/config.mk.in
+++ b/ext/htslib/config.mk.in
@ -0,0 +1,120 @@
+#  Optional configure Makefile overrides for htslib.
+#
+#    Copyright (C) 2015-2017, 2019, 2023 Genome Research Ltd.
+#
+#    Author: John Marshall <jm18@sanger.ac.uk>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# This is @configure_input@
+#
+# If you use configure, this file overrides variables and augments rules
+# in the Makefile to reflect your configuration choices.  If you don't run
+# configure, the main Makefile contains suitable conservative defaults.
+
+prefix       = @prefix@
+exec_prefix  = @exec_prefix@
+bindir       = @bindir@
+includedir   = @includedir@
+libdir       = @libdir@
+libexecdir   = @libexecdir@
+datarootdir  = @datarootdir@
+mandir       = @mandir@
+
+CC     = @CC@
+RANLIB = @RANLIB@
+
+CPPFLAGS = @CPPFLAGS@
+CFLAGS   = @CFLAGS@
+LDFLAGS  = @LDFLAGS@
+VERSION_SCRIPT_LDFLAGS = @VERSION_SCRIPT_LDFLAGS@
+LIBS     = @LIBS@
+
+PLATFORM   = @PLATFORM@
+PLUGIN_EXT = @PLUGIN_EXT@
+
+# The default Makefile enables some of the optional files, but we blank
+# them so they can be controlled by configure instead.
+NONCONFIGURE_OBJS =
+
+# Lowercase here indicates these are "local" to config.mk
+plugin_OBJS =
+noplugin_LDFLAGS =
+noplugin_LIBS =
+
+# ifeq/.../endif, +=, and target-specific variables are GNU Make-specific.
+# If you don't have GNU Make, comment out this conditional and note that
+# to enable libcurl you will need to implement the following elsewhere.
+ifeq "libcurl-@libcurl@" "libcurl-enabled"
+
+LIBCURL_LIBS = -lcurl
+
+plugin_OBJS += hfile_libcurl.o
+
+hfile_libcurl$(PLUGIN_EXT): LIBS += $(LIBCURL_LIBS)
+
+noplugin_LIBS += $(LIBCURL_LIBS)
+
+endif
+
+ifeq "gcs-@gcs@" "gcs-enabled"
+plugin_OBJS += hfile_gcs.o
+endif
+
+ifeq "s3-@s3@" "s3-enabled"
+plugin_OBJS += hfile_s3.o
+plugin_OBJS += hfile_s3_write.o
+
+CRYPTO_LIBS = @CRYPTO_LIBS@
+noplugin_LIBS += $(CRYPTO_LIBS)
+hfile_s3$(PLUGIN_EXT): LIBS += $(CRYPTO_LIBS)
+hfile_s3_write$(PLUGIN_EXT): LIBS += $(CRYPTO_LIBS) $(LIBCURL_LIBS)
+endif
+
+ifeq "plugins-@enable_plugins@" "plugins-yes"
+
+plugindir  = @plugindir@
+pluginpath = @pluginpath@
+
+LIBHTS_OBJS += plugin.o
+PLUGIN_OBJS += $(plugin_OBJS)
+
+plugin.o plugin.pico: ALL_CPPFLAGS += -DPLUGINPATH=\"$(pluginpath)\"
+
+# When built as separate plugins, these record their version themselves.
+hfile_gcs.o hfile_gcs.pico: version.h
+hfile_libcurl.o hfile_libcurl.pico: version.h
+hfile_s3.o hfile_s3.pico: version.h
+hfile_s3_write.o hfile_s3_write.pico: version.h
+
+# Windows DLL plugins depend on the import library, built as a byproduct.
+$(plugin_OBJS:.o=.cygdll): cyghts-$(LIBHTS_SOVERSION).dll
+
+else
+
+LIBHTS_OBJS += $(plugin_OBJS)
+LDFLAGS += $(noplugin_LDFLAGS)
+LIBS += $(noplugin_LIBS)
+
+endif
+
+# Extra CFLAGS for specific files
+HTS_CFLAGS_AVX2 = @hts_cflags_avx2@
+HTS_CFLAGS_AVX512 = @hts_cflags_avx512@
+HTS_CFLAGS_SSE4 = @hts_cflags_sse4@
--- a/ext/htslib/configure.ac
+++ b/ext/htslib/configure.ac
@ -0,0 +1,675 @@
+# Configure script for htslib, a C library for high-throughput sequencing data.
+#
+#    Copyright (C) 2015-2024 Genome Research Ltd.
+#
+#    Author: John Marshall <jm18@sanger.ac.uk>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+dnl Process this file with autoconf to produce a configure script
+AC_INIT([HTSlib], m4_esyscmd_s([./version.sh 2>/dev/null]),
+        [samtools-help@lists.sourceforge.net], [], [http://www.htslib.org/])
+AC_PREREQ(2.63)  dnl This version introduced 4-argument AC_CHECK_HEADER
+AC_CONFIG_SRCDIR(hts.c)
+AC_CONFIG_HEADERS(config.h)
+
+m4_include([m4/hts_prog_cc_warnings.m4])
+m4_include([m4/hts_check_compile_flags_needed.m4])
+m4_include([m4/hts_hide_dynamic_syms.m4])
+m4_include([m4/pkg.m4])
+
+dnl Copyright notice to be copied into the generated configure script
+AC_COPYRIGHT([Portions copyright (C) 2020-2024 Genome Research Ltd.
+
+This configure script is free software: you are free to change and
+redistribute it.  There is NO WARRANTY, to the extent permitted by law.])
+
+dnl Notes to be copied (by autoheader) into the generated config.h.in
+AH_TOP([/* If you use configure, this file provides @%:@defines reflecting your
+   configuration choices.  If you have not run configure, suitable
+   conservative defaults will be used.
+
+   Autoheader adds a number of items to this template file that are not
+   used by HTSlib: STDC_HEADERS and most HAVE_*_H header file defines
+   are immaterial, as we assume standard ISO C headers and facilities;
+   the PACKAGE_* defines are unused and are overridden by the more
+   accurate PACKAGE_VERSION as computed by the Makefile.  */])
+
+dnl Variant of AC_MSG_ERROR that ensures subsequent make(1) invocations fail
+dnl until the configuration error is resolved and configure is run again.
+AC_DEFUN([MSG_ERROR],
+  [cat > config.mk <<'EOF'
+ifneq ($(MAKECMDGOALS),distclean)
+$(error Resolve configure error first)
+endif
+EOF
+   AC_MSG_ERROR([$1], [$2])])
+
+AC_PROG_CC
+AC_PROG_RANLIB
+
+dnl Turn on compiler warnings, if possible
+HTS_PROG_CC_WARNINGS
+dnl Flags to treat warnings as errors.  These need to be applied to CFLAGS
+dnl later as they can interfere with some of the tests (notably AC_SEARCH_LIBS)
+HTS_PROG_CC_WERROR(hts_late_cflags)
+
+# HTSlib uses X/Open-only facilities (M_SQRT2 etc, drand48() etc), and
+# various POSIX functions that are provided by various _POSIX_C_SOURCE values
+# or by _XOPEN_SOURCE >= 500. It also uses usleep(), which is removed when
+# _XOPEN_SOURCE >= 700. Additionally, some definitions may require
+# _XOPEN_SOURCE >= 600 on some platforms (snprintf on MinGW,
+# PTHREAD_MUTEX_RECURSIVE on some Linux distributions). Hence we set it to 600.
+
+# Define _XOPEN_SOURCE unless the user has already done so via $CPPFLAGS etc.
+AC_CHECK_DECL([_XOPEN_SOURCE], [],
+  [AC_DEFINE([_XOPEN_SOURCE], [600], [Specify X/Open requirements])],
+  [])
+
+dnl Check that we have cpuid, and if so run the x86 SIMD checks
+AC_CHECK_DECLS([__get_cpuid_max, __cpuid_count], [
+   hts_have_cpuid=yes
+], [
+   hts_have_cpuid=no
+], [[#include <cpuid.h>]])
+
+AS_IF(test "x$hts_have_cpuid" = "xyes", [
+dnl Options for rANS32x16 sse4.1 version - sse4.1
+HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1 -mssse3 -mpopcnt],
+ [AC_LANG_PROGRAM([[
+    #ifdef __x86_64__
+    #include "x86intrin.h"
+    #endif
+  ]],[[
+    #ifdef __x86_64__
+    __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
+    __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b);
+    return _mm_popcnt_u32(*((char *) &c));
+    #endif
+  ]])], [
+  hts_cflags_sse4="$flags_needed"
+  AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.])
+  AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
+  AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if rANS source using SSE4.1 can be compiled.
+])
+
+dnl Propagate HTSlib's unaligned access preference to htscodecs
+  AH_VERBATIM([UBSAN],[
+/* Prevent unaligned access in htscodecs SSE4 rANS codec */
+#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0
+#undef UBSAN
+#endif])
+  AC_DEFINE([UBSAN],1,[])
+])
+AC_SUBST([hts_cflags_sse4])
+
+dnl Options for rANS32x16 avx2 version
+HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2 -mpopcnt], [AC_LANG_PROGRAM([[
+    #ifdef __x86_64__
+    #include "x86intrin.h"
+    #endif
+  ]],[[
+    #ifdef __x86_64__
+    __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+    __m256i b = _mm256_add_epi32(a, a);
+    long long c = _mm256_extract_epi64(b, 0);
+    return _mm_popcnt_u32((int) c);
+    #endif
+  ]])], [
+  hts_cflags_avx2="$flags_needed"
+  AC_SUBST([hts_cflags_avx2])
+  AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
+  AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if rANS source using AVX2 can be compiled.])
+])
+
+dnl Options for rANS32x16 avx512 version
+HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f -mpopcnt],
+ [AC_LANG_PROGRAM([[
+    #ifdef __x86_64__
+    #include "x86intrin.h"
+    #endif
+  ]],[[
+    #ifdef __x86_64__
+    __m512i a = _mm512_set1_epi32(1);
+    __m512i b = _mm512_add_epi32(a, a);
+    __m256i c = _mm512_castsi512_si256(b);
+    __m256i d = _mm512_extracti64x4_epi64(a, 1);
+    return _mm_popcnt_u32(*((char *) &c)) + (*(char *) &d);
+    #endif
+  ]])], [
+  hts_cflags_avx512="$flags_needed"
+  AC_SUBST([hts_cflags_avx512])
+  AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
+  AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if rANS source using AVX512F can be compiled.])
+])
+
+dnl Check for working __builtin_cpu_supports (ssse3 is broken on some clangs)
+AC_MSG_CHECKING([for working __builtin_cpu_supports("ssse3")])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([],[
+  if (__builtin_cpu_supports("ssse3")) {
+    return 0;
+  }
+])], [
+  AC_MSG_RESULT([yes])
+  AC_DEFINE([HAVE_BUILTIN_CPU_SUPPORT_SSSE3], 1,
+            [Defined to 1 if __builtin_cpu_supports("ssse3") works])
+], [
+  AC_MSG_RESULT([no])
+])
+
+dnl Check for function attribute used in conjunction with __builtin_cpu_supports
+AC_MSG_CHECKING([for __attribute__((target))])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+  __attribute__((target("ssse3")))
+  int zero(void) {
+    return 0;
+  }
+]], [[zero();]])], [
+  AC_MSG_RESULT([yes])
+  AC_DEFINE([HAVE_ATTRIBUTE_TARGET], 1,
+            [Define if __attribute__((target(...))) is available.])
+], [
+  AC_MSG_RESULT([no])
+])
+
+]) dnl End of AS_IF(hts_have_cpuid)
+
+dnl Avoid chicken-and-egg problem where pkg-config supplies the
+dnl PKG_PROG_PKG_CONFIG macro, but we want to use it to check
+dnl for pkg-config...
+m4_ifdef([PKG_PROG_PKG_CONFIG], [PKG_PROG_PKG_CONFIG], [PKG_CONFIG=""])
+
+need_crypto=no
+pc_requires=
+static_LDFLAGS=$LDFLAGS
+static_LIBS='-lpthread -lz -lm'
+private_LIBS=$LDFLAGS
+
+AC_ARG_ENABLE([versioned-symbols],
+  [AS_HELP_STRING([--disable-versioned-symbols],
+                  [disable versioned symbols in shared library])],
+  [], [enable_versioned_symbols=yes])
+
+AC_ARG_ENABLE([bz2],
+  [AS_HELP_STRING([--disable-bz2],
+                  [omit support for BZ2-compressed CRAM files])],
+  [], [enable_bz2=yes])
+
+AC_ARG_ENABLE([gcs],
+  [AS_HELP_STRING([--enable-gcs],
+                  [support Google Cloud Storage URLs])],
+  [], [enable_gcs=check])
+
+AC_SYS_LARGEFILE
+
+AC_ARG_ENABLE([libcurl],
+  [AS_HELP_STRING([--enable-libcurl],
+                  [enable libcurl-based support for http/https/etc URLs])],
+  [], [enable_libcurl=check])
+
+AC_ARG_ENABLE([lzma],
+  [AS_HELP_STRING([--disable-lzma],
+                  [omit support for LZMA-compressed CRAM files])],
+  [], [enable_lzma=yes])
+
+AC_ARG_ENABLE([plugins],
+  [AS_HELP_STRING([--enable-plugins],
+                  [enable separately-compiled plugins for file access])],
+  [], [enable_plugins=no])
+AC_SUBST(enable_plugins)
+
+AC_ARG_WITH([external-htscodecs],
+  [AS_HELP_STRING([--with-external-htscodecs],
+                  [get htscodecs functions from a shared library])],
+  [], [with_external_htscodecs=no])
+AC_SUBST(with_external_htscodecs)
+
+AC_ARG_WITH([libdeflate],
+  [AS_HELP_STRING([--with-libdeflate],
+                  [use libdeflate for faster crc and deflate algorithms])],
+  [], [with_libdeflate=check])
+
+AC_ARG_WITH([plugin-dir],
+  [AS_HELP_STRING([--with-plugin-dir=DIR],
+                  [plugin installation location [LIBEXECDIR/htslib]])],
+  [case $withval in
+     yes|no) MSG_ERROR([no directory specified for --with-plugin-dir]) ;;
+   esac],
+   [with_plugin_dir='$(libexecdir)/htslib'])
+AC_SUBST([plugindir], $with_plugin_dir)
+
+AC_ARG_WITH([plugin-path],
+  [AS_HELP_STRING([--with-plugin-path=PATH],
+                  [default HTS_PATH plugin search path [PLUGINDIR]])],
+  [case $withval in
+     yes) MSG_ERROR([no path specified for --with-plugin-path]) ;;
+     no)  with_plugin_path= ;;
+   esac],
+  [with_plugin_path=$with_plugin_dir])
+AC_SUBST([pluginpath], $with_plugin_path)
+
+AC_ARG_ENABLE([s3],
+  [AS_HELP_STRING([--enable-s3],
+                  [support Amazon AWS S3 URLs])],
+  [], [enable_s3=check])
+
+basic_host=${host_alias:-unknown-`uname -s`}
+AC_MSG_CHECKING([shared library type for $basic_host])
+case $basic_host in
+  *-cygwin* | *-CYGWIN*)
+    host_result="Cygwin DLL"
+    PLATFORM=CYGWIN
+    PLUGIN_EXT=.cygdll
+    ;;
+  *-darwin* | *-Darwin*)
+    host_result="Darwin dylib"
+    PLATFORM=Darwin
+    PLUGIN_EXT=.bundle
+    ;;
+  *-msys* | *-MSYS* | *-mingw* | *-MINGW*)
+    host_result="MSYS dll"
+    PLATFORM=MSYS
+    PLUGIN_EXT=.dll
+    # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64,
+    # %lld and %z printf formats work.  It also enforces the snprintf to
+    # be C99 compliant so it returns the correct values (in kstring.c).
+
+    # Now set by default, so no need to do it here.
+    # CPPFLAGS="$CPPFLAGS -D_XOPEN_SOURCE=600"
+    ;;
+  *)
+    host_result="plain .so"
+    PLATFORM=default
+    PLUGIN_EXT=.so
+    ;;
+esac
+AC_MSG_RESULT([$host_result])
+AC_SUBST([PLATFORM])
+
+dnl Check for versioned symbol support
+dnl Only try for .so shared libraries as other types won't work
+AS_IF([test x"$PLATFORM" = xdefault && test x"$enable_versioned_symbols" = xyes],
+  [AC_CACHE_CHECK([whether the linker supports versioned symbols],
+    [hts_cv_have_versioned_symbols], [
+      save_LDFLAGS=$LDFLAGS
+      LDFLAGS="-Wl,-version-script,$srcdir/htslib.map $LDFLAGS"
+      AC_LINK_IFELSE([AC_LANG_PROGRAM()],
+                     [hts_cv_have_versioned_symbols=yes],
+                     [hts_cv_have_versioned_symbols=no])
+      LDFLAGS=$save_LDFLAGS
+    ])
+   AS_IF([test "x$hts_cv_have_versioned_symbols" = xyes],[
+     VERSION_SCRIPT_LDFLAGS='-Wl,-version-script,$(srcprefix)htslib.map'
+     AC_SUBST([VERSION_SCRIPT_LDFLAGS])
+   ])
+])
+
+dnl Try to get more control over which symbols are exported in the shared
+dnl library.
+HTS_HIDE_DYNAMIC_SYMBOLS
+
+dnl FIXME This pulls in dozens of standard header checks
+AC_FUNC_MMAP
+AC_CHECK_FUNCS([gmtime_r fsync drand48 srand48_deterministic])
+
+# Darwin has a dubious fdatasync() symbol, but no declaration in <unistd.h>
+AC_CHECK_DECL([fdatasync(int)], [AC_CHECK_FUNCS(fdatasync)])
+
+AC_MSG_CHECKING([for __attribute__((constructor))])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+  static __attribute__((constructor)) void noop(void) {}
+]], [])], [
+  AC_MSG_RESULT([yes])
+  AC_DEFINE([HAVE_ATTRIBUTE_CONSTRUCTOR], 1,
+            [Define if __attribute__((constructor)) is available.])
+], [AC_MSG_RESULT([no])])
+
+AC_MSG_CHECKING([for clock_gettime with CLOCK_PROCESS_CPUTIME_ID])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <time.h>]], [[
+  struct timespec ts;
+  clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+]])], [
+  AC_MSG_RESULT([yes])
+  AC_DEFINE([HAVE_CLOCK_GETTIME_CPUTIME], 1,
+            [Define if clock_gettime exists and accepts CLOCK_PROCESS_CPUTIME_ID.])
+], [AC_MSG_RESULT([no])])
+
+if test $enable_plugins != no; then
+  AC_SEARCH_LIBS([dlsym], [dl], [],
+    [MSG_ERROR([dlsym() not found
+
+Plugin support requires dynamic linking facilities from the operating system.
+Either configure with --disable-plugins or resolve this error to build HTSlib.])])
+  # Check if the compiler understands -rdynamic 
+  # TODO Test whether this is required and/or needs tweaking per-platform
+  HTS_TEST_CC_C_LD_FLAG([-rdynamic],[rdynamic_flag])
+  AS_IF([test x"$rdynamic_flag" != "xno"],
+    [LDFLAGS="$LDFLAGS $rdynamic_flag"
+     static_LDFLAGS="$static_LDFLAGS $rdynamic_flag"])
+  case "$ac_cv_search_dlsym" in
+    -l*) static_LIBS="$static_LIBS $ac_cv_search_dlsym" ;;
+  esac
+  AC_DEFINE([ENABLE_PLUGINS], 1, [Define if HTSlib should enable plugins.])
+  AC_SUBST([PLUGIN_EXT])
+  AC_DEFINE_UNQUOTED([PLUGIN_EXT], ["$PLUGIN_EXT"],
+                     [Platform-dependent plugin filename extension.])
+fi
+
+AC_SEARCH_LIBS([log], [m], [],
+  [MSG_ERROR([log() not found
+
+HTSLIB requires a working floating-point math library.
+FAILED.  This error must be resolved in order to build HTSlib successfully.])])
+
+zlib_devel=ok
+dnl Set a trivial non-empty INCLUDES to avoid excess default includes tests
+AC_CHECK_HEADER([zlib.h], [], [zlib_devel=missing], [;])
+AC_CHECK_LIB(z, inflate,  [], [zlib_devel=missing])
+
+if test $zlib_devel != ok; then
+  MSG_ERROR([zlib development files not found
+
+HTSlib uses compression routines from the zlib library <http://zlib.net>.
+Building HTSlib requires zlib development files to be installed on the build
+machine; you may need to ensure a package such as zlib1g-dev (on Debian or
+Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions or Cygwin)
+is installed.
+
+FAILED.  This error must be resolved in order to build HTSlib successfully.])
+fi
+
+dnl connect() etc. fns are in libc on linux, but libsocket on illumos/Solaris
+AC_SEARCH_LIBS([recv], [socket ws2_32], [
+if test "$ac_cv_search_recv" != "none required"
+then
+  static_LIBS="$static_LIBS $ac_cv_search_recv"
+fi],
+  dnl on MinGW-i686, checking recv() linking requires an annotated declaration
+  [AC_MSG_CHECKING([for library containing recv using declaration])
+   LIBS="-lws2_32 $LIBS"
+   AC_LINK_IFELSE(
+     [AC_LANG_PROGRAM([[#include <winsock2.h>]], [[recv(0, 0, 0, 0);]])],
+     [AC_MSG_RESULT([-lws2_32])
+      static_LIBS="$static_LIBS -lws2_32"],
+     [AC_MSG_RESULT([no])
+      MSG_ERROR([unable to find the recv() function])])])
+
+if test "$enable_bz2" != no; then
+  bz2_devel=ok
+  AC_CHECK_HEADER([bzlib.h], [], [bz2_devel=missing], [;])
+  AC_CHECK_LIB([bz2], [BZ2_bzBuffToBuffCompress], [], [bz2_devel=missing])
+  if test $bz2_devel != ok; then
+    MSG_ERROR([libbzip2 development files not found
+
+The CRAM format may use bzip2 compression, which is implemented in HTSlib
+by using compression routines from libbzip2 <http://www.bzip.org/>.
+
+Building HTSlib requires libbzip2 development files to be installed on the
+build machine; you may need to ensure a package such as libbz2-dev (on Debian
+or Ubuntu Linux) or bzip2-devel (on RPM-based Linux distributions or Cygwin)
+is installed.
+
+Either configure with --disable-bz2 (which will make some CRAM files
+produced elsewhere unreadable) or resolve this error to build HTSlib.])
+  fi
+dnl Unfortunately the 'bzip2' package-cfg module is not standard.
+dnl Redhat/Fedora has it; Debian/Ubuntu does not.
+  if test -n "$PKG_CONFIG" && "$PKG_CONFIG" --exists bzip2; then
+     pc_requires="$pc_requires bzip2"
+  else
+     private_LIBS="$private_LIBS -lbz2"
+  fi
+  static_LIBS="$static_LIBS -lbz2"
+fi
+
+if test "$enable_lzma" != no; then
+  lzma_devel=ok
+  AC_CHECK_HEADERS([lzma.h], [], [lzma_devel=header-missing], [;])
+  AC_CHECK_LIB([lzma], [lzma_easy_buffer_encode], [], [lzma_devel=missing])
+  if test $lzma_devel = missing; then
+    MSG_ERROR([liblzma development files not found
+
+The CRAM format may use LZMA2 compression, which is implemented in HTSlib
+by using compression routines from liblzma <http://tukaani.org/xz/>.
+
+Building HTSlib requires liblzma development files to be installed on the
+build machine; you may need to ensure a package such as liblzma-dev (on Debian
+or Ubuntu Linux), xz-devel (on RPM-based Linux distributions or Cygwin), or
+xz (via Homebrew on macOS) is installed; or build XZ Utils from source.
+
+Either configure with --disable-lzma (which will make some CRAM files
+produced elsewhere unreadable) or resolve this error to build HTSlib.])
+  fi
+  pc_requires="$pc_requires liblzma"
+  static_LIBS="$static_LIBS -llzma"
+fi
+
+AS_IF([test "x$with_external_htscodecs" != "xno"],
+  [libhtscodecs=ok
+   AC_CHECK_HEADER([htscodecs/rANS_static4x16.h],[],
+                   [libhtscodecs='missing header'],[;])
+   AC_CHECK_LIB([htscodecs],[rans_compress_bound_4x16],
+                [:],[libhtscodecs='missing library'])
+   AS_IF([test "$libhtscodecs" = "ok"],
+     [AC_DEFINE([HAVE_EXTERNAL_LIBHTSCODECS], 1, [Define if using an external libhtscodecs])
+      LIBS="-lhtscodecs $LIBS"
+      private_LIBS="-lhtscodecs $private_LIBS"
+      static_LIBS="-lhtscodecs $static_LIBS"
+      selected_htscodecs_mk="htscodecs_external.mk"],
+      [MSG_ERROR([libhtscodecs development files not found: $libhtscodecs
+
+You asked to use an external htscodecs library, but do not have the
+required header / library files.  You either need to supply these and
+if necessary set CPPFLAGS and LDFLAGS so the compiler can find them;
+or configure using --without-external-htscodecs to build the required
+functions from the htscodecs submodule.
+])])],
+  [AC_MSG_CHECKING([whether htscodecs files are present])
+   AS_IF([test -e "$srcdir/htscodecs/htscodecs/rANS_static4x16.h"],
+     [AC_MSG_RESULT([yes])
+      selected_htscodecs_mk="htscodecs_bundled.mk"],
+     [AC_MSG_RESULT([no])
+      AS_IF([test -e "$srcdir/.git"],
+        [MSG_ERROR([htscodecs submodule files not present.
+
+HTSlib uses some functions from the htscodecs project, which is normally
+included as a submodule.  Try running:
+
+  git submodule update --init --recursive
+
+in  the top-level htslib directory to update it, and then re-run configure.
+])],
+        [MSG_ERROR([htscodecs submodule files not present.
+
+You have an incomplete distribution.  Please try downloading one of the
+official releases from https://www.htslib.org
+])])])])
+
+AS_IF([test "x$with_libdeflate" != "xno"],
+  [libdeflate=ok
+   AC_CHECK_HEADER([libdeflate.h],[],[libdeflate='missing header'],[;])
+   AC_CHECK_LIB([deflate], [libdeflate_deflate_compress],[:],[libdeflate='missing library'])
+   AS_IF([test "$libdeflate" = "ok"],
+    [AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define if libdeflate is available.])
+     LIBS="-ldeflate $LIBS"
+     private_LIBS="$private_LIBS -ldeflate"
+     static_LIBS="$static_LIBS -ldeflate"],
+    [AS_IF([test "x$with_libdeflate" != "xcheck"],
+       [MSG_ERROR([libdeflate development files not found: $libdeflate
+
+You requested libdeflate, but do not have the required header / library
+files.  The source for libdeflate is available from
+<https://github.com/ebiggers/libdeflate>.  You may have to adjust
+search paths in CPPFLAGS and/or LDFLAGS if the header and library
+are not currently on them.
+
+Either configure with --without-libdeflate or resolve this error to build
+HTSlib.])])])])
+
+libcurl=disabled
+if test "$enable_libcurl" != no; then
+  libcurl_devel=ok
+  AC_CHECK_HEADER([curl/curl.h], [], [libcurl_devel="headers not found"], [;])
+  AC_CHECK_LIB([curl], [curl_easy_pause], [:],
+    [AC_CHECK_LIB([curl], [curl_easy_init],
+       [libcurl_devel="library is too old (7.18+ required)"],
+       [libcurl_devel="library not found"])])
+
+  if test "$libcurl_devel" = ok; then
+    AC_DEFINE([HAVE_LIBCURL], 1, [Define if libcurl file access is enabled.])
+    libcurl=enabled
+  elif test "$enable_libcurl" = check; then
+    AC_MSG_WARN([libcurl not enabled: $libcurl_devel])
+  else
+    MSG_ERROR([libcurl $libcurl_devel
+
+Support for HTTPS and other SSL-based URLs requires routines from the libcurl
+library <http://curl.se/libcurl/>.  Building HTSlib with libcurl enabled
+requires libcurl development files to be installed on the build machine; you
+may need to ensure a package such as libcurl4-{gnutls,nss,openssl}-dev (on
+Debian or Ubuntu Linux) or libcurl-devel (on RPM-based Linux distributions
+or Cygwin) is installed.
+
+Either configure with --disable-libcurl or resolve this error to build HTSlib.])
+  fi
+
+dnl -lcurl is only needed for static linking if hfile_libcurl is not a plugin
+  if test "$libcurl" = enabled ; then
+    if test "$enable_plugins" != yes ; then
+      static_LIBS="$static_LIBS -lcurl"
+    fi
+  fi
+fi
+AC_SUBST([libcurl])
+
+gcs=disabled
+if test "$enable_gcs" != no; then
+  if test $libcurl = enabled; then
+    AC_DEFINE([ENABLE_GCS], 1, [Define if HTSlib should enable GCS support.])
+    gcs=enabled
+  else
+    case "$enable_gcs" in
+      check) AC_MSG_WARN([GCS support not enabled: requires libcurl support]) ;;
+      *) MSG_ERROR([GCS support not enabled
+
+Support for Google Cloud Storage URLs requires libcurl support to be enabled
+in HTSlib.  Configure with --enable-libcurl in order to use GCS URLs.])
+      ;;
+    esac
+  fi
+fi
+AC_SUBST([gcs])
+
+s3=disabled
+if test "$enable_s3" != no; then
+  if test $libcurl = enabled; then
+    s3=enabled
+    need_crypto="$enable_s3"
+  else
+    case "$enable_s3" in
+      check) AC_MSG_WARN([S3 support not enabled: requires libcurl support]) ;;
+      *) MSG_ERROR([S3 support not enabled
+
+Support for Amazon AWS S3 URLs requires libcurl support to be enabled
+in HTSlib.  Configure with --enable-libcurl in order to use S3 URLs.])
+      ;;
+    esac
+  fi
+fi
+
+CRYPTO_LIBS=
+if test $need_crypto != no; then
+  AC_CHECK_FUNC([CCHmac],
+    [AC_DEFINE([HAVE_COMMONCRYPTO], 1,
+               [Define if you have the Common Crypto library.])],
+    [save_LIBS=$LIBS
+     AC_SEARCH_LIBS([HMAC], [crypto],
+       [AC_DEFINE([HAVE_HMAC], 1, [Define if you have libcrypto-style HMAC().])
+        case "$ac_cv_search_HMAC" in
+          -l*) CRYPTO_LIBS=$ac_cv_search_HMAC ;;
+        esac],
+     [case "$need_crypto" in
+     check) AC_MSG_WARN([S3 support not enabled: requires SSL development files])
+         s3=disabled ;;
+     *) MSG_ERROR([SSL development files not found
+
+Support for AWS S3 URLs requires routines from an SSL library.  Building
+HTSlib with libcurl enabled requires SSL development files to be installed
+on the build machine; you may need to ensure a package such as libgnutls-dev,
+libnss3-dev, or libssl-dev (on Debian or Ubuntu Linux, corresponding to the
+libcurl4-*-dev package installed), or openssl-devel (on RPM-based Linux
+distributions or Cygwin) is installed.
+
+Either configure with --disable-s3 or resolve this error to build HTSlib.]) ;;
+       esac])
+     LIBS=$save_LIBS])
+dnl Only need to add to static_LIBS if not building as a plugin
+  if test "$enable_plugins" != yes ; then
+     static_LIBS="$static_LIBS $CRYPTO_LIBS"
+  fi
+fi
+
+dnl Look for regcomp in various libraries (needed on windows/mingw).
+AC_SEARCH_LIBS(regcomp, regex, [libregex=needed], [])
+
+dnl Look for PTHREAD_MUTEX_RECURSIVE.
+dnl This is normally in pthread.h except on some broken glibc implementations.
+dnl Now set by default
+dnl AC_CHECK_DECL(PTHREAD_MUTEX_RECURSIVE, [], [AC_DEFINE([_XOPEN_SOURCE],[600], [Needed for PTHREAD_MUTEX_RECURSIVE])], [[#include <pthread.h>]])
+
+if test "$s3" = enabled ; then
+   AC_DEFINE([ENABLE_S3], 1, [Define if HTSlib should enable S3 support.])
+fi
+
+dnl Apply value from HTS_PROG_CC_WERROR (if set)
+AS_IF([test "x$hts_late_cflags" != x],[CFLAGS="$CFLAGS $hts_late_cflags"])
+
+AC_SUBST([s3])
+AC_SUBST([CRYPTO_LIBS])
+
+AC_SUBST([pc_requires])
+AC_SUBST([private_LIBS])
+AC_SUBST([static_LDFLAGS])
+AC_SUBST([static_LIBS])
+
+AC_CONFIG_FILES([config.mk htslib.pc.tmp:htslib.pc.in])
+AC_CONFIG_LINKS([htscodecs.mk:$selected_htscodecs_mk])
+
+if test "$srcdir" != .; then
+  # Set up for a separate build directory. As HTSlib uses a non-recursive
+  # makefile, we need to create additional build subdirectories explicitly.
+  AC_CONFIG_LINKS([Makefile:Makefile htslib.mk:htslib.mk])
+  AC_CONFIG_FILES([htslib_vars.mk:builddir_vars.mk.in])
+  AC_CONFIG_COMMANDS([mkdir],
+    [AS_MKDIR_P([cram])
+     AS_MKDIR_P([htscodecs/htscodecs])
+     AS_MKDIR_P([htscodecs/tests])
+     AS_MKDIR_P([test/fuzz])
+     AS_MKDIR_P([test/longrefs])
+     AS_MKDIR_P([test/tabix])])
+fi
+
+# @HTSDIRslash_if_relsrcdir@ will be empty when $srcdir is absolute
+case "$srcdir" in
+  /*) HTSDIRslash_if_relsrcdir= ;;
+  *)  HTSDIRslash_if_relsrcdir='$(HTSDIR)/' ;;
+esac
+AC_SUBST([HTSDIRslash_if_relsrcdir])
+
+AC_OUTPUT
--- a/ext/htslib/configure~
+++ b/ext/htslib/configure~
--- a/ext/htslib/cram/README
+++ b/ext/htslib/cram/README
@ -0,0 +1,214 @@
+CRAM encoding internals
+=======================
+
+A quick summary of functions involved.
+
+The encoder works by accumulating a bunch of BAM records (via the
+cram_put_bam_seq function), and at a certain point (eg counter of
+records, or switching reference) the array of BAM records it turned
+into a container, which in turn creates slices, holding CRAM
+data-series in blocks.  The function that turns an array of BAM
+objects into the container is below.
+
+cram_encode_container func:
+    Validate references MD5 against header, unless no_ref mode
+    If embed_ref <= 1, fetch ref
+        Switch to embed_ref=2 if failed
+
+    Foreach slice:
+        If embed_ref == 2
+	    call cram_generate_reference
+	        if failed switch to no_ref mode
+	Foreach sequence
+	    call process_one_read to append BAM onto each data series (DS)
+	        call cram_stats_add for each DS to gather metrics
+		call cram_encode_aux
+
+    # We now have cram DS, per slice
+    call cram_encoder_init, per DS (based on cram_stats_add data)
+
+    Foreach slice:
+        call cram_encode_slice to turn DS to blocks
+	    call cram_compess_slice
+
+    call cram_encode_compression_header
+
+Threading
+---------
+
+CRAM can be multi-threaded, but this brings complications.
+
+The above function is the main CPU user, so it is this bit which can
+be executed in parallel from multiple threads.  To understand this we
+need to now look at how the primary loop works when writing a CRAM:
+
+Encoding main thread:
+    repeatedly calls cram_put_bam_seq
+        calls cram_new_container on first time through to initialise
+	calls cram_next_container when current is full or we need to flush
+	    calls cram_flush_container_mt to flush last container
+        pushes BAM object onto current container
+
+If non-threaded, cram_flush_container_mt does:
+    call cram_flush_container
+        call cram_encode_container to go from BAM to CRAM data-series
+ 	call cram_flush_container2 (writes it out)
+
+If threaded, cram_flush_container_mt does:
+    Main: Dispatch cram_flush_thread job
+        Thread: call cram_encode_container to go from BAM to CRAM data-series
+    Main: Call cram_flush_result to drain queue of encoded containers
+        Main: Call cram_flush_container2 (writes it out);
+
+
+
+Decisions on when to create new containers, detection of sorted vs unsorted,
+switching to multi-seq mode, etc occur at the main thread in
+cram_put_bam_seq.
+
+We can change our mind on container parameters at any point up until
+the cram_encode_container call.  At that point these parameters get
+baked into a container compression header and all data-series
+generated need to be in sync with the parameters.
+
+It is possible that some parameter changes can get detected while
+encoding the container, as it is there where we fetch references.  Eg
+the need to enable embedded reference or switch to non-ref mode.
+
+While encoding a container, we can change the parameters for *this*
+container, and we can also set the default parameter for subsequent
+new parameters via the global cram fd to avoid spamming attempts to
+load a reference which doesn't exist, but we cannot change other
+containers that are being processed in parallel.  They'll fend for
+themselves.
+
+References
+----------
+
+To avoid spamming the reference servers, there is a shared cache of
+references being currently used by all the worker threads (leading to
+confusing terminology of reference-counting of references).  So each
+container fetches its section of reference, but the memory for that is
+handled via its own layer.
+
+The shared references and ref meta-data is held in cram_fd -> refs (a
+refs_t pointer):
+
+    // References structure.
+    struct refs_t {
+        string_alloc_t *pool;  // String pool for holding filenames and SN vals
+    
+        khash_t(refs) *h_meta; // ref_entry*, index by name
+        ref_entry **ref_id;    // ref_entry*, index by ID
+        int nref;              // number of ref_entry
+    
+        char *fn;              // current file opened
+        BGZF *fp;              // and the hFILE* to go with it.
+    
+        int count;             // how many cram_fd sharing this refs struct
+    
+        pthread_mutex_t lock;  // Mutex for multi-threaded updating
+        ref_entry *last;       // Last queried sequence
+        int last_id;           // Used in cram_ref_decr_locked to delay free
+    };
+
+Within this, ref_entry is the per-reference information:
+
+    typedef struct ref_entry {
+        char *name;
+        char *fn;
+        int64_t length;
+        int64_t offset;
+        int bases_per_line;
+        int line_length;
+        int64_t count;     // for shared references so we know to dealloc seq
+        char *seq;
+        mFILE *mf;
+        int is_md5;        // Reference comes from a raw seq found by MD5
+        int validated_md5;
+    } ref_entry;
+
+Sharing of references to track use between threads is via
+cram_ref_incr* and cram_ref_decr* (which locked and unlocked
+variants).  We free a reference when the usage count hits zero.  To
+avoid spamming discard and reload in single-thread creation of a
+pos-sorted CRAM, we keep track of the last reference in cram_fd and
+delay discard by one loop iteration.
+
+There are complexities here around whether the references come from a
+single ref.fa file, are from a local MD5sum cache with one file per
+reference (mmapped), or whether they're fetched from some remote
+REF_PATH query such as the EBI.  (This later case typically downloads
+to a local md5 based ref-cache first and mmaps from there.)
+
+The refs struct start off by being populated from the SAM header.  We
+have M5 tag and name known, maybe a filename, but length is 0 and seq
+is NULL.  This is done by cram_load_reference:
+
+cram_load_reference (cram_fd, filename):
+    if filename non-NULL
+        call refs_load_fai
+	    Populates ref_entry with filename, name, length, line-len, etc
+    	sanitise_SQ_lines
+    If no refs loaded
+        call refs_from_header
+	    populates ref_entry with name.
+	    Sets length=0 as marker for not-yet-loaded
+
+The main interface used from the code is cram_get_ref().  It takes a
+reference ID, start and end coordinate and returns a pointer to the
+relevant sub-sequence.
+
+cram_get_ref:
+    r = fd->refs->ref_id[id];    // current ref
+    call cram_populate_ref if stored length is 0 (ie ref.fa set)
+        search REF_PATH / REF_CACHE
+	call bgzf_open if local_path
+	call open_path_mfile otherwise
+	copy to local REF_CACHE if required (eg remote fetch)
+
+    If start = 1 and end = ref-length
+       If ref seq unknown
+           call cram_ref_load to load entire ref and use that
+
+    If ref seq now known, return it
+
+    // Otherwise known via .fai or we've errored by now.
+    call load_ref_portion to return a sub-seq from index fasta
+
+The encoder asks for the entire reference rather than a small portion
+of it as we're usually encoding a large amount.  The decoder may be
+dealing with small range queries, so it only asks for the relevant
+sub-section of reference as specified in the cram slice headers.
+
+
+TODO
+====
+
+- Multi-ref mode is enabled when we have too many small containers in
+  a row.
+
+  Instead of firing off new containers when we switch reference, we
+  could always make a new container after N records, separating off
+  M <= N to make the container such that all M are the same reference,
+  and shuffling any remaining N-M down as the start of the next.
+
+  This means we can detect how many new containers we would create,
+  and enable multi-ref mode straight away rather than keeping a recent
+  history of how many small containers we've emitted.
+
+- The cache of references currently being used is a better place to
+  track the global embed-ref and non-ref logic.  Better than cram_fd.
+  Cram_fd is a one-way change, as once we enable non-ref we'll stick
+  with it.
+
+  However if it was per-ref in the ref-cache then we'd probe and try
+  each reference once, and then all new containers for that ref would
+  honour the per-ref parameters.  So a single missing reference in the
+  middle of a large file wouldn't change behaviour for all subsequence
+  references.
+
+  Optionally we could still do meta-analysis on how many references
+  are failing, and switch the global cram_fd params to avoid repeated
+  testing of reference availability if it's becoming obvious that none
+  of them are known.
--- a/ext/htslib/cram/cram.h
+++ b/ext/htslib/cram/cram.h
@ -0,0 +1,61 @@
+/*
+Copyright (c) 2012-2013, 2015, 2018 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*! \file
+ * CRAM interface.
+ *
+ * Consider using the higher level hts_*() API for programs that wish to
+ * be file format agnostic (see htslib/hts.h).
+ *
+ * This API should be used for CRAM specific code. The specifics of the
+ * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h
+ * although these should not be included directly (use this file instead).
+ */
+
+#ifndef CRAM_ALL_H
+#define CRAM_ALL_H
+
+#include "cram_samtools.h"
+#include "../header.h"
+#include "cram_structs.h"
+#include "cram_io.h"
+#include "cram_encode.h"
+#include "cram_decode.h"
+#include "cram_stats.h"
+#include "cram_codecs.h"
+#include "cram_index.h"
+
+// Validate against the external cram.h,
+//
+// This contains duplicated portions from cram_io.h and cram_structs.h,
+// so we want to ensure that the prototypes match.
+#include "../htslib/cram.h"
+
+#endif
--- a/ext/htslib/cram/cram_codecs.c
+++ b/ext/htslib/cram/cram_codecs.c
--- a/ext/htslib/cram/cram_codecs.h
+++ b/ext/htslib/cram/cram_codecs.h
@ -0,0 +1,264 @@
+/*
+Copyright (c) 2012-2015, 2018, 2020, 2023 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CRAM_CODECS_H
+#define CRAM_CODECS_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct cram_codec;
+
+/*
+ * Slow but simple huffman decoder to start with.
+ * Read a bit at a time, keeping track of {length, value}
+ * eg. 1 1 0 1 => {1,1},  {2,3}, {3,6}, {4,13}
+ *
+ * Keep track of this through the huffman code table.
+ * For fast scanning we have an index of where the first code of length X
+ * appears.
+ */
+typedef struct {
+    int64_t symbol;
+    int32_t p; // next code start value, minus index to codes[]
+    int32_t code;
+    int32_t len;
+} cram_huffman_code;
+
+typedef struct {
+    int ncodes;
+    cram_huffman_code *codes;
+    int option;
+} cram_huffman_decoder;
+
+#define MAX_HUFF 128
+typedef struct {
+    cram_huffman_code *codes;
+    int nvals;
+    int val2code[MAX_HUFF+1]; // value to code lookup for small values
+    int option;
+} cram_huffman_encoder;
+
+typedef struct {
+    int32_t offset;
+    int32_t nbits;
+} cram_beta_decoder;
+
+// A PACK transform, packing multiple values into a single byte
+typedef struct {
+    int32_t nbits;
+    enum cram_encoding sub_encoding;
+    void *sub_codec_dat;
+    struct cram_codec *sub_codec;
+    int nval;  // number of items in maps
+    uint32_t rmap[256]; // 0,1,2,3 -> P,A,C,K
+    int map[256];       // P,A,C,K -> 0,1,2,3 // NB: max input is uint8_tb? Or use hash?
+} cram_xpack_decoder;
+typedef cram_xpack_decoder cram_xpack_encoder;
+
+// Transforms symbols X,Y,Z to bytes 0,1,2.
+typedef struct {
+    enum cram_encoding len_encoding;
+    enum cram_encoding lit_encoding;
+    void *len_dat;
+    void *lit_dat;
+    struct cram_codec *len_codec;
+    struct cram_codec *lit_codec;
+    int cur_len;
+    int cur_lit;
+    int rep_score[256];
+    char *to_flush;
+    size_t to_flush_size;
+} cram_xrle_decoder;
+typedef cram_xrle_decoder cram_xrle_encoder;
+
+// DELTA + zigzag + varint encoding
+typedef struct {
+    // FIXME: define endian here too.  Require little endian?
+    int64_t last;
+    uint8_t word_size; // 1, 2, 4, 8
+    //uint8_t sign;      // true if input data is already signed
+    enum cram_encoding sub_encoding;
+    void *sub_codec_dat;
+    struct cram_codec *sub_codec;
+} cram_xdelta_decoder;
+typedef cram_xdelta_decoder cram_xdelta_encoder;
+
+typedef struct {
+    int32_t offset;
+} cram_gamma_decoder;
+
+typedef struct {
+    int32_t offset;
+    int32_t k;
+} cram_subexp_decoder;
+
+typedef struct {
+    int32_t content_id;
+    enum cram_external_type type;
+} cram_external_decoder;
+
+typedef struct {
+    int32_t content_id;
+    int64_t offset;
+    enum cram_external_type type;
+} cram_varint_decoder;
+
+typedef struct {
+    struct cram_codec *len_codec;
+    struct cram_codec *val_codec;
+} cram_byte_array_len_decoder;
+
+typedef struct {
+    unsigned char stop;
+    int32_t content_id;
+} cram_byte_array_stop_decoder;
+
+typedef struct {
+    enum cram_encoding len_encoding;
+    enum cram_encoding val_encoding;
+    void *len_dat;
+    void *val_dat;
+    struct cram_codec *len_codec;
+    struct cram_codec *val_codec;
+} cram_byte_array_len_encoder;
+
+typedef struct {
+    int64_t val;
+} cram_const_codec;
+
+/*
+ * A generic codec structure.
+ */
+struct cram_codec {
+    enum cram_encoding codec;
+    cram_block *out;
+    varint_vec *vv;
+    int codec_id;
+    void (*free)(struct cram_codec *codec);
+    int (*decode)(cram_slice *slice, struct cram_codec *codec,
+                  cram_block *in, char *out, int *out_size);
+    int (*encode)(cram_slice *slice, struct cram_codec *codec,
+                  char *in, int in_size);
+    int (*store)(struct cram_codec *codec, cram_block *b, char *prefix,
+                 int version);
+    int (*size)(cram_slice *slice, struct cram_codec *codec);
+    int (*flush)(struct cram_codec *codec);
+    cram_block *(*get_block)(cram_slice *slice, struct cram_codec *codec);
+    int (*describe)(struct cram_codec *codec, kstring_t *ks);
+
+    union {
+        cram_huffman_decoder         huffman;
+        cram_external_decoder        external;
+        cram_beta_decoder            beta;
+        cram_gamma_decoder           gamma;
+        cram_subexp_decoder          subexp;
+        cram_byte_array_len_decoder  byte_array_len;
+        cram_byte_array_stop_decoder byte_array_stop;
+        cram_xpack_decoder           xpack;
+        cram_xrle_decoder            xrle;
+        cram_xdelta_decoder          xdelta;
+        cram_const_codec             xconst;
+        cram_varint_decoder          varint;
+
+        cram_huffman_encoder         e_huffman;
+        cram_external_decoder        e_external;
+        cram_byte_array_stop_decoder e_byte_array_stop;
+        cram_byte_array_len_encoder  e_byte_array_len;
+        cram_beta_decoder            e_beta;
+        cram_xpack_decoder           e_xpack;
+        cram_xrle_decoder            e_xrle;
+        cram_xdelta_decoder          e_xdelta;
+        cram_const_codec             e_xconst;
+        cram_varint_decoder          e_varint;
+    } u;
+};
+
+const char *cram_encoding2str(enum cram_encoding t);
+
+cram_codec *cram_decoder_init(cram_block_compression_hdr *hdr,
+                              enum cram_encoding codec, char *data, int size,
+                              enum cram_external_type option,
+                              int version, varint_vec *vv);
+cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st,
+                              enum cram_external_type option, void *dat,
+                              int version, varint_vec *vv);
+
+//int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size);
+//void cram_decoder_free(void *codes);
+
+//#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++))
+
+#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7)
+
+/*
+ * Check that enough bits are left in a block to satisy a bit-based decoder.
+ * Return  0 if there are enough
+ *         1 if not.
+ */
+
+static inline int cram_not_enough_bits(cram_block *blk, int nbits) {
+    if (nbits < 0 ||
+        (blk->byte >= blk->uncomp_size && nbits > 0) ||
+        (blk->uncomp_size - blk->byte <= INT32_MAX / 8 + 1 &&
+         (blk->uncomp_size - blk->byte) * 8 + blk->bit - 7 < nbits)) {
+        return 1;
+    }
+    return 0;
+}
+
+/*
+ * Returns the content_id used by this codec, also in id2 if byte_array_len.
+ * Returns -1 for the CORE block and -2 for unneeded.
+ * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
+ */
+int cram_codec_to_id(cram_codec *c, int *id2);
+
+/*
+ * cram_codec structures are specialised for decoding or encoding.
+ * Unfortunately this makes turning a decoder into an encoder (such as
+ * when transcoding files) problematic.
+ *
+ * This function converts a cram decoder codec into an encoder version
+ * in-place (ie it modifiers the codec itself).
+ *
+ * Returns 0 on success;
+ *        -1 on failure.
+ */
+int cram_codec_decoder2encoder(cram_fd *fd, cram_codec *c);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CRAM_CODECS_H */
--- a/ext/htslib/cram/cram_decode.c
+++ b/ext/htslib/cram/cram_decode.c
--- a/ext/htslib/cram/cram_decode.h
+++ b/ext/htslib/cram/cram_decode.h
@ -0,0 +1,142 @@
+/*
+Copyright (c) 2012-2013, 2018, 2024 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*! \file
+ * Include cram.h instead.
+ *
+ * This is an internal part of the CRAM system and is automatically included
+ * when you #include cram.h.
+ *
+ * Implements the decoding portion of CRAM I/O. Also see
+ * cram_codecs.[ch] for the actual encoding functions themselves.
+ */
+
+#ifndef CRAM_DECODE_H
+#define CRAM_DECODE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ----------------------------------------------------------------------
+ * CRAM sequence iterators.
+ */
+
+/*! Read the next cram record and return it as a cram_record.
+ *
+ * Note that to decode cram_record the caller will need to look up some data
+ * in the current slice, pointed to by fd->ctr->slice. This is valid until
+ * the next call to cram_get_seq (which may invalidate it).
+ *
+ * @return
+ * Returns record pointer on success (do not free);
+ *        NULL on failure
+ */
+cram_record *cram_get_seq(cram_fd *fd);
+
+/*! Read the next cram record and convert it to a bam_seq_t struct.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on EOF or failure (check fd->err)
+ */
+int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam);
+
+
+/* ----------------------------------------------------------------------
+ * Internal functions
+ */
+
+/*! INTERNAL:
+ * Decodes a CRAM block compression header.
+ *
+ * @return
+ * Returns header ptr on success;
+ *         NULL on failure
+ */
+cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd,
+                                                           cram_block *b);
+
+/*! INTERNAL:
+ * Decodes a CRAM (un)mapped slice header block.
+ *
+ * @return
+ * Returns slice header ptr on success;
+ *         NULL on failure
+ */
+cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b);
+
+
+/*! INTERNAL:
+ * Loads and decodes the next slice worth of data.
+ *
+ * @return
+ * Returns cram slice pointer on success;
+ *         NULL on failure
+ */
+cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp);
+
+/*! INTERNAL:
+ * Decode an entire slice from container blocks. Fills out s->crecs[] array.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
+                      sam_hdr_t *hdr);
+
+
+/*! INTERNAL:
+ * Converts a cram in-memory record into a bam in-memory record. We
+ * pass a pointer to a bam_seq_t pointer along with the a pointer to
+ * the allocated size. These can initially be pointers to NULL and zero.
+ *
+ * This function will reallocate the bam buffer as required and update
+ * (*bam)->alloc accordingly, allowing it to be used within a loop
+ * efficiently without needing to allocate new bam objects over and
+ * over again.
+ *
+ * Returns the used size of the bam record on success
+ *         -1 on failure.
+ */
+int cram_to_bam(sam_hdr_t *sh, cram_fd *fd, cram_slice *s,
+                cram_record *cr, int rec, bam_seq_t **bam);
+
+/*
+ * Drains and frees the decode read-queue for a multi-threaded reader.
+ */
+void cram_drain_rqueue(cram_fd *fd);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/ext/htslib/cram/cram_encode.c
+++ b/ext/htslib/cram/cram_encode.c
--- a/ext/htslib/cram/cram_encode.h
+++ b/ext/htslib/cram/cram_encode.h
@ -0,0 +1,116 @@
+/*
+Copyright (c) 2012-2013, 2018 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*! \file
+ * Include cram.h instead.
+ *
+ * This is an internal part of the CRAM system and is automatically included
+ * when you #include cram.h.
+ *
+ * Implements the encoding portion of CRAM I/O. Also see
+ * cram_codecs.[ch] for the actual encoding functions themselves.
+ */
+
+#ifndef CRAM_ENCODE_H
+#define CRAM_ENCODE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ----------------------------------------------------------------------
+ * CRAM sequence iterators.
+ */
+
+/*! Write iterator: put BAM format sequences into a CRAM file.
+ *
+ * We buffer up a containers worth of data at a time.
+ *
+ * FIXME: break this into smaller pieces.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b);
+
+
+/* ----------------------------------------------------------------------
+ * Internal functions
+ */
+
+/*! INTERNAL:
+ * Encodes a compression header block into a generic cram_block structure.
+ *
+ * @return
+ * Returns cram_block ptr on success;
+ *         NULL on failure
+ */
+cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c,
+                                           cram_block_compression_hdr *h,
+                                           int embed_ref);
+
+/*! INTERNAL:
+ * Encodes a slice compression header.
+ *
+ * @return
+ * Returns cram_block on success;
+ *         NULL on failure
+ */
+cram_block *cram_encode_slice_header(cram_fd *fd, cram_slice *s);
+
+/*! INTERNAL:
+ * Encodes all slices in a container into blocks.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ *
+ * FIXME: separate into encode_container and write_container. Ideally
+ * we should be able to do read_container / write_container or
+ * decode_container / encode_container.
+ */
+int cram_encode_container(cram_fd *fd, cram_container *c);
+
+/*! INTERNAL:
+ *
+ * During cram_next_container or before the final flush at end of
+ * file, we update the current slice headers and increment the slice
+ * number to the next slice.
+ *
+ * See cram_next_container() and cram_close().
+ */
+void cram_update_curr_slice(cram_container *c, int version);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/ext/htslib/cram/cram_external.c
+++ b/ext/htslib/cram/cram_external.c
--- a/ext/htslib/cram/cram_index.c
+++ b/ext/htslib/cram/cram_index.c
--- a/ext/htslib/cram/cram_index.h
+++ b/ext/htslib/cram/cram_index.h
@ -0,0 +1,115 @@
+/*
+Copyright (c) 2013, 2018 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CRAM_INDEX_H
+#define CRAM_INDEX_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Loads a CRAM .crai index into memory.
+ * Returns 0 for success
+ *        -1 for failure
+ */
+int cram_index_load(cram_fd *fd, const char *fn, const char *fn_idx);
+
+void cram_index_free(cram_fd *fd);
+
+/*
+ * Searches the index for the first slice overlapping a reference ID
+ * and position.
+ *
+ * Returns the cram_index pointer on success
+ *         NULL on failure
+ */
+cram_index *cram_index_query(cram_fd *fd, int refid, hts_pos_t pos, cram_index *frm);
+cram_index *cram_index_last(cram_fd *fd, int refid, cram_index *from);
+cram_index *cram_index_query_last(cram_fd *fd, int refid, hts_pos_t end);
+
+/*
+ * Skips to a container overlapping the start coordinate listed in
+ * cram_range.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int cram_seek_to_refpos(cram_fd *fd, cram_range *r);
+
+void cram_index_free(cram_fd *fd);
+
+/*
+ * Skips to a container overlapping the start coordinate listed in
+ * cram_range.
+ *
+ * In theory we call cram_index_query multiple times, once per slice
+ * overlapping the range. However slices may be absent from the index
+ * which makes this problematic. Instead we find the left-most slice
+ * and then read from then on, skipping decoding of slices and/or
+ * whole containers when they don't overlap the specified cram_range.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int cram_seek_to_refpos(cram_fd *fd, cram_range *r);
+
+/*
+ * Builds an index file.
+ *
+ * fd is a newly opened cram file that we wish to index.
+ * fn_base is the filename of the associated CRAM file.
+ * fn_idx is the filename of the index file to be written;
+ * if NULL, we add ".crai" to fn_base to get the index filename.
+ *
+ * Returns 0 on success,
+ *         negative on failure (-1 for read failure, -4 for write failure)
+ */
+int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx);
+
+/*
+ * Adds a single slice to the index.
+ *
+ * Returns 0 on success,
+ *        -1 on failure
+ */
+int cram_index_slice(cram_fd *fd,
+                     cram_container *c,
+                     cram_slice *s,
+                     BGZF *fp,
+                     off_t cpos,
+                     off_t spos, // relative to cpos
+                     off_t sz);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/ext/htslib/cram/cram_io.c
+++ b/ext/htslib/cram/cram_io.c
--- a/ext/htslib/cram/cram_io.h
+++ b/ext/htslib/cram/cram_io.h
@ -0,0 +1,648 @@
+/*
+Copyright (c) 2012-2020 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*! \file
+ * Include cram.h instead.
+ *
+ * This is an internal part of the CRAM system and is automatically included
+ * when you #include cram.h.
+ *
+ * Implements the low level CRAM I/O primitives.
+ * This includes basic data types such as byte, int, ITF-8,
+ * maps, bitwise I/O, etc.
+ */
+
+#ifndef CRAM_IO_H
+#define CRAM_IO_H
+
+#include <stdint.h>
+
+#include "misc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**@{ ----------------------------------------------------------------------
+ * ITF8 encoding and decoding.
+ *
+ * Also see the itf8_get and itf8_put macros.
+ */
+
+/*! INTERNAL: Converts two characters into an integer for use in switch{} */
+#define CRAM_KEY(a,b) ((((unsigned char) a)<<8)|(((unsigned char) b)))
+
+/*! Reads an integer in ITF-8 encoding from 'fd' and stores it in
+ * *val.
+ *
+ * @return
+ * Returns the number of bytes read on success;
+ *        -1 on failure
+ */
+int itf8_decode(cram_fd *fd, int32_t *val);
+
+extern const int itf8_bytes[16];
+extern const int ltf8_bytes[256];
+
+/*! Pushes a value in ITF8 format onto the end of a block.
+ *
+ * This shouldn't be used for high-volume data as it is not the fastest
+ * method.
+ *
+ * @return
+ * Returns the number of bytes written
+ */
+int itf8_put_blk(cram_block *blk, int32_t val);
+int ltf8_put_blk(cram_block *blk, int64_t val);
+
+/*! Pulls a literal 32-bit value from a block.
+ *
+ * @returns the number of bytes decoded;
+ *         -1 on failure.
+ */
+int int32_get_blk(cram_block *b, int32_t *val);
+
+/*! Pushes a literal 32-bit value onto the end of a block.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure.
+ */
+int int32_put_blk(cram_block *blk, int32_t val);
+
+
+/**@}*/
+/**@{ ----------------------------------------------------------------------
+ * CRAM blocks - the dynamically growable data block. We have code to
+ * create, update, (un)compress and read/write.
+ *
+ * These are derived from the deflate_interlaced.c blocks, but with the
+ * CRAM extension of content types and IDs.
+ */
+
+/*! Allocates a new cram_block structure with a specified content_type and
+ * id.
+ *
+ * @return
+ * Returns block pointer on success;
+ *         NULL on failure
+ */
+cram_block *cram_new_block(enum cram_content_type content_type,
+                           int content_id);
+
+/*! Reads a block from a cram file.
+ *
+ * @return
+ * Returns cram_block pointer on success;
+ *         NULL on failure
+ */
+cram_block *cram_read_block(cram_fd *fd);
+
+/*! Writes a CRAM block.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_write_block(cram_fd *fd, cram_block *b);
+
+/*! Frees a CRAM block, deallocating internal data too.
+ */
+void cram_free_block(cram_block *b);
+
+/*! Uncompress a memory block using Zlib.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size);
+
+/*! Uncompresses a CRAM block, if compressed.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_uncompress_block(cram_block *b);
+
+/*! Compresses a block.
+ *
+ * Compresses a block using one of two different zlib strategies. If we only
+ * want one choice set strat2 to be -1.
+ *
+ * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
+ * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
+ * significantly faster.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
+                        int method, int level);
+int cram_compress_block2(cram_fd *fd, cram_slice *s,
+                         cram_block *b, cram_metrics *metrics,
+                         int method, int level);
+
+cram_metrics *cram_new_metrics(void);
+char *cram_block_method2str(enum cram_block_method_int m);
+char *cram_content_type2str(enum cram_content_type t);
+
+/*
+ * Find an external block by its content_id
+ */
+
+static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) {
+  //fprintf(stderr, "%d\t%p\n", id, slice->block_by_id);
+    uint32_t v = id;
+    if (slice->block_by_id && v < 256) {
+        return slice->block_by_id[v];
+    } else {
+        v = 256 + v % 251;
+        if (slice->block_by_id &&
+            slice->block_by_id[v] &&
+            slice->block_by_id[v]->content_id == id)
+            return slice->block_by_id[v];
+
+        // Otherwise a linear search in case of collision
+        int i;
+        for (i = 0; i < slice->hdr->num_blocks; i++) {
+            cram_block *b = slice->block[i];
+            if (b && b->content_type == EXTERNAL && b->content_id == id)
+                return b;
+        }
+    }
+    return NULL;
+}
+
+/* --- Accessor macros for manipulating blocks on a byte by byte basis --- */
+
+/* Block size and data pointer. */
+#define BLOCK_SIZE(b) ((b)->byte)
+#define BLOCK_DATA(b) ((b)->data)
+
+/* Returns the address one past the end of the block */
+#define BLOCK_END(b) (&(b)->data[(b)->byte])
+
+/* Make block exactly 'l' bytes long */
+static inline int block_resize_exact(cram_block *b, size_t len) {
+    unsigned char *tmp = realloc(b->data, len);
+    if (!tmp)
+        return -1;
+    b->alloc = len;
+    b->data = tmp;
+    return 0;
+}
+
+/* Request block to be at least 'l' bytes long */
+static inline int block_resize(cram_block *b, size_t len) {
+    if (b->alloc > len)
+        return 0;
+
+    size_t alloc = b->alloc+800;
+    alloc = MAX(alloc + (alloc>>2), len);
+    return block_resize_exact(b, alloc);
+}
+
+
+/* Ensure the block can hold at least another 'l' bytes */
+static inline int block_grow(cram_block *b, size_t len) {
+    return block_resize(b, BLOCK_SIZE(b) + len);
+}
+
+/* Append string 's' of length 'l'. */
+static inline int block_append(cram_block *b, const void *s, size_t len) {
+    if (block_grow(b, len) < 0)
+        return -1;
+
+    if (len) {
+        memcpy(BLOCK_END(b), s, len);
+        BLOCK_SIZE(b) += len;
+    }
+
+    return 0;
+}
+
+/* Append as single character 'c' */
+static inline int block_append_char(cram_block *b, char c) {
+    if (block_grow(b, 1) < 0)
+        return -1;
+
+    b->data[b->byte++] = c;
+    return 0;
+}
+
+/* Append a single unsigned integer */
+static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i);
+static inline int block_append_uint(cram_block *b, unsigned int i) {
+    if (block_grow(b, 11) < 0)
+        return -1;
+
+    unsigned char *cp = &b->data[b->byte];
+    b->byte += append_uint32(cp, i) - cp;
+    return 0;
+}
+
+// Versions of above with built in goto block_err calls.
+#define BLOCK_RESIZE_EXACT(b,l) if (block_resize_exact((b),(l))<0) goto block_err
+#define BLOCK_RESIZE(b,l)       if (block_resize((b),(l))      <0) goto block_err
+#define BLOCK_GROW(b,l)         if (block_grow((b),(l))        <0) goto block_err
+#define BLOCK_APPEND(b,s,l)     if (block_append((b),(s),(l))  <0) goto block_err
+#define BLOCK_APPEND_CHAR(b,c)  if (block_append_char((b),(c)) <0) goto block_err
+#define BLOCK_APPEND_UINT(b,i)  if (block_append_uint((b),(i)) <0) goto block_err
+
+static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i) {
+    uint32_t j;
+
+    if (i == 0) {
+        *cp++ = '0';
+        return cp;
+    }
+
+    if (i < 100)        goto b1;
+    if (i < 10000)      goto b3;
+    if (i < 1000000)    goto b5;
+    if (i < 100000000)  goto b7;
+
+    if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;}
+    if ((j = i / 100000000))  {*cp++ = j + '0'; i -= j*100000000;  goto x7;}
+ b7:if ((j = i / 10000000))   {*cp++ = j + '0'; i -= j*10000000;   goto x6;}
+    if ((j = i / 1000000))    {*cp++ = j + '0', i -= j*1000000;    goto x5;}
+ b5:if ((j = i / 100000))     {*cp++ = j + '0', i -= j*100000;     goto x4;}
+    if ((j = i / 10000))      {*cp++ = j + '0', i -= j*10000;      goto x3;}
+ b3:if ((j = i / 1000))       {*cp++ = j + '0', i -= j*1000;       goto x2;}
+    if ((j = i / 100))        {*cp++ = j + '0', i -= j*100;        goto x1;}
+ b1:if ((j = i / 10))         {*cp++ = j + '0', i -= j*10;         goto x0;}
+    if (i)                     *cp++ = i + '0';
+    return cp;
+
+ x8: *cp++ = i / 100000000 + '0', i %= 100000000;
+ x7: *cp++ = i / 10000000  + '0', i %= 10000000;
+ x6: *cp++ = i / 1000000   + '0', i %= 1000000;
+ x5: *cp++ = i / 100000    + '0', i %= 100000;
+ x4: *cp++ = i / 10000     + '0', i %= 10000;
+ x3: *cp++ = i / 1000      + '0', i %= 1000;
+ x2: *cp++ = i / 100       + '0', i %= 100;
+ x1: *cp++ = i / 10        + '0', i %= 10;
+ x0: *cp++ = i             + '0';
+
+    return cp;
+}
+
+static inline unsigned char *append_sub32(unsigned char *cp, uint32_t i) {
+    *cp++ = i / 100000000 + '0', i %= 100000000;
+    *cp++ = i / 10000000  + '0', i %= 10000000;
+    *cp++ = i / 1000000   + '0', i %= 1000000;
+    *cp++ = i / 100000    + '0', i %= 100000;
+    *cp++ = i / 10000     + '0', i %= 10000;
+    *cp++ = i / 1000      + '0', i %= 1000;
+    *cp++ = i / 100       + '0', i %= 100;
+    *cp++ = i / 10        + '0', i %= 10;
+    *cp++ = i             + '0';
+
+    return cp;
+}
+
+static inline unsigned char *append_uint64(unsigned char *cp, uint64_t i) {
+    uint64_t j;
+
+    if (i <= 0xffffffff)
+        return append_uint32(cp, i);
+
+    if ((j = i/1000000000) > 1000000000) {
+        cp = append_uint32(cp, j/1000000000);
+        j %= 1000000000;
+        cp = append_sub32(cp, j);
+    } else {
+        cp = append_uint32(cp, i / 1000000000);
+    }
+    cp = append_sub32(cp, i % 1000000000);
+
+    return cp;
+}
+
+#define BLOCK_UPLEN(b) \
+    (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b))
+
+/**@}*/
+/**@{ ----------------------------------------------------------------------
+ * Reference sequence handling
+ */
+
+/*! Loads a reference set from fn and stores in the cram_fd.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_load_reference(cram_fd *fd, char *fn);
+
+/*! Generates a lookup table in refs based on the SQ headers in sam_hdr_t.
+ *
+ * Indexes references by the order they appear in a BAM file. This may not
+ * necessarily be the same order they appear in the fasta reference file.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int refs2id(refs_t *r, sam_hdr_t *hdr);
+
+void refs_free(refs_t *r);
+
+/*! Returns a portion of a reference sequence from start to end inclusive.
+ *
+ * The returned pointer is owned by the cram_file fd and should not be freed
+ * by the caller. It is valid only until the next cram_get_ref is called
+ * with the same fd parameter (so is thread-safe if given multiple files).
+ *
+ * To return the entire reference sequence, specify start as 1 and end
+ * as 0.
+ *
+ * @return
+ * Returns reference on success;
+ *         NULL on failure
+ */
+char *cram_get_ref(cram_fd *fd, int id, hts_pos_t start, hts_pos_t end);
+void cram_ref_incr(refs_t *r, int id);
+void cram_ref_decr(refs_t *r, int id);
+/**@}*/
+/**@{ ----------------------------------------------------------------------
+ * Containers
+ */
+
+/*! Creates a new container, specifying the maximum number of slices
+ * and records permitted.
+ *
+ * @return
+ * Returns cram_container ptr on success;
+ *         NULL on failure
+ */
+cram_container *cram_new_container(int nrec, int nslice);
+void cram_free_container(cram_container *c);
+
+/*! Reads a container header.
+ *
+ * @return
+ * Returns cram_container on success;
+ *         NULL on failure or no container left (fd->err == 0).
+ */
+cram_container *cram_read_container(cram_fd *fd);
+
+/*! Writes a container structure.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_write_container(cram_fd *fd, cram_container *h);
+
+/*! Flushes a container to disk.
+ *
+ * Flushes a completely or partially full container to disk, writing
+ * container structure, header and blocks. This also calls the encoder
+ * functions.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_flush_container(cram_fd *fd, cram_container *c);
+int cram_flush_container_mt(cram_fd *fd, cram_container *c);
+
+
+/**@}*/
+/**@{ ----------------------------------------------------------------------
+ * Compression headers; the first part of the container
+ */
+
+/*! Creates a new blank container compression header
+ *
+ * @return
+ * Returns header ptr on success;
+ *         NULL on failure
+ */
+cram_block_compression_hdr *cram_new_compression_header(void);
+
+/*! Frees a cram_block_compression_hdr */
+void cram_free_compression_header(cram_block_compression_hdr *hdr);
+
+
+/**@}*/
+/**@{ ----------------------------------------------------------------------
+ * Slices and slice headers
+ */
+
+/*! Frees a slice header */
+void cram_free_slice_header(cram_block_slice_hdr *hdr);
+
+/*! Frees a slice */
+void cram_free_slice(cram_slice *s);
+
+/*! Creates a new empty slice in memory, for subsequent writing to
+ * disk.
+ *
+ * @return
+ * Returns cram_slice ptr on success;
+ *         NULL on failure
+ */
+cram_slice *cram_new_slice(enum cram_content_type type, int nrecs);
+
+/*! Loads an entire slice.
+ *
+ * FIXME: In 1.0 the native unit of slices within CRAM is broken
+ * as slices contain references to objects in other slices.
+ * To work around this while keeping the slice oriented outer loop
+ * we read all slices and stitch them together into a fake large
+ * slice instead.
+ *
+ * @return
+ * Returns cram_slice ptr on success;
+ *         NULL on failure
+ */
+cram_slice *cram_read_slice(cram_fd *fd);
+
+
+
+/**@}*/
+/**@{ ----------------------------------------------------------------------
+ * CRAM file definition (header)
+ */
+
+/*! Reads a CRAM file definition structure.
+ *
+ * @return
+ * Returns file_def ptr on success;
+ *         NULL on failure
+ */
+cram_file_def *cram_read_file_def(cram_fd *fd);
+
+/*! Writes a cram_file_def structure to cram_fd.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_write_file_def(cram_fd *fd, cram_file_def *def);
+
+/*! Frees a cram_file_def structure. */
+void cram_free_file_def(cram_file_def *def);
+
+
+/**@}*/
+/**@{ ----------------------------------------------------------------------
+ * SAM header I/O
+ */
+
+/*! Reads the SAM header from the first CRAM data block.
+ *
+ * Also performs minimal parsing to extract read-group
+ * and sample information.
+ *
+ * @return
+ * Returns SAM hdr ptr on success;
+ *         NULL on failure
+ */
+sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd);
+
+/*! Writes a CRAM SAM header.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr);
+
+
+/**@}*/
+/**@{ ----------------------------------------------------------------------
+ * The top-level cram opening, closing and option handling
+ */
+
+/*! Opens a CRAM file for read (mode "rb") or write ("wb").
+ *
+ * The filename may be "-" to indicate stdin or stdout.
+ *
+ * @return
+ * Returns file handle on success;
+ *         NULL on failure.
+ */
+cram_fd *cram_open(const char *filename, const char *mode);
+
+/*! Opens an existing stream for reading or writing.
+ *
+ * @return
+ * Returns file handle on success;
+ *         NULL on failure.
+ */
+cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode);
+
+/*! Closes a CRAM file.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_close(cram_fd *fd);
+
+/*
+ * Seek within a CRAM file.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int cram_seek(cram_fd *fd, off_t offset, int whence);
+
+/*
+ * Flushes a CRAM file.
+ * Useful for when writing to stdout without wishing to close the stream.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int cram_flush(cram_fd *fd);
+
+/*! Checks for end of file on a cram_fd stream.
+ *
+ * @return
+ * Returns 0 if not at end of file
+ *         1 if we hit an expected EOF (end of range or EOF block)
+ *         2 for other EOF (end of stream without EOF block)
+ */
+int cram_eof(cram_fd *fd);
+
+/*! Sets options on the cram_fd.
+ *
+ * See CRAM_OPT_* definitions in cram_structs.h.
+ * Use this immediately after opening.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...);
+
+/*! Sets options on the cram_fd.
+ *
+ * See CRAM_OPT_* definitions in cram_structs.h.
+ * Use this immediately after opening.
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args);
+
+/*!
+ * Attaches a header to a cram_fd.
+ *
+ * This should be used when creating a new cram_fd for writing where
+ * we have an sam_hdr_t already constructed (eg from a file we've read
+ * in).
+ *
+ * @return
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr);
+
+/*!
+ * Returns the hFILE connected to a cram_fd.
+ */
+static inline struct hFILE *cram_hfile(cram_fd *fd) {
+    return fd->fp;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CRAM_IO_H */
--- a/ext/htslib/cram/cram_samtools.h
+++ b/ext/htslib/cram/cram_samtools.h
@ -0,0 +1,75 @@
+/*
+Copyright (c) 2010-2013, 2018, 2020 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CRAM_SAMTOOLS_H
+#define CRAM_SAMTOOLS_H
+
+/* Samtools compatible API */
+#define bam_blk_size(b)  ((b)->l_data)
+#define bam_set_blk_size(b,v) ((b)->data_len = (v))
+
+#define bam_ref(b)       (b)->core.tid
+#define bam_pos(b)       (b)->core.pos
+#define bam_mate_pos(b)  (b)->core.mpos
+#define bam_mate_ref(b)  (b)->core.mtid
+#define bam_ins_size(b)  (b)->core.isize
+#define bam_seq_len(b)   (b)->core.l_qseq
+#define bam_cigar_len(b) (b)->core.n_cigar
+#define bam_flag(b)      (b)->core.flag
+#define bam_bin(b)       (b)->core.bin
+#define bam_map_qual(b)  (b)->core.qual
+#define bam_name_len(b)  ((b)->core.l_qname - (b)->core.l_extranul)
+#define bam_name(b)      bam_get_qname((b))
+#define bam_qual(b)      bam_get_qual((b))
+#define bam_seq(b)       bam_get_seq((b))
+#define bam_cigar(b)     bam_get_cigar((b))
+#define bam_aux(b)       bam_get_aux((b))
+
+#define bam_free(b)      bam_destroy1((b))
+
+#define bam_reg2bin(beg,end) hts_reg2bin((beg),(end),14,5)
+
+#include "../htslib/sam.h"
+
+enum cigar_op {
+    BAM_CMATCH_=BAM_CMATCH,
+    BAM_CINS_=BAM_CINS,
+    BAM_CDEL_=BAM_CDEL,
+    BAM_CREF_SKIP_=BAM_CREF_SKIP,
+    BAM_CSOFT_CLIP_=BAM_CSOFT_CLIP,
+    BAM_CHARD_CLIP_=BAM_CHARD_CLIP,
+    BAM_CPAD_=BAM_CPAD,
+    BAM_CBASE_MATCH=BAM_CEQUAL,
+    BAM_CBASE_MISMATCH=BAM_CDIFF
+};
+
+typedef bam1_t bam_seq_t;
+
+#endif /* CRAM_SAMTOOLS_H */
--- a/ext/htslib/cram/cram_stats.c
+++ b/ext/htslib/cram/cram_stats.c
@ -0,0 +1,227 @@
+/*
+Copyright (c) 2012-2014, 2016, 2018, 2020 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <stdio.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <math.h>
+#include <inttypes.h>
+
+#include "cram.h"
+#include "os.h"
+
+cram_stats *cram_stats_create(void) {
+    return calloc(1, sizeof(cram_stats));
+}
+
+int cram_stats_add(cram_stats *st, int64_t val) {
+    st->nsamp++;
+
+    //assert(val >= 0);
+
+    if (val < MAX_STAT_VAL && val >= 0) {
+        st->freqs[val]++;
+    } else {
+        khint_t k;
+        int r;
+
+        if (!st->h) {
+            st->h = kh_init(m_i2i);
+            if (!st->h)
+                return -1;
+        }
+
+        k = kh_put(m_i2i, st->h, val, &r);
+        if (r == 0)
+            kh_val(st->h, k)++;
+        else if (r != -1)
+            kh_val(st->h, k) = 1;
+        else
+            return -1;
+    }
+    return 0;
+}
+
+void cram_stats_del(cram_stats *st, int64_t val) {
+    st->nsamp--;
+
+    //assert(val >= 0);
+
+    if (val < MAX_STAT_VAL && val >= 0) {
+        st->freqs[val]--;
+        assert(st->freqs[val] >= 0);
+    } else if (st->h) {
+        khint_t k = kh_get(m_i2i, st->h, val);
+
+        if (k != kh_end(st->h)) {
+            if (--kh_val(st->h, k) == 0)
+                kh_del(m_i2i, st->h, k);
+        } else {
+            hts_log_warning("Failed to remove val %"PRId64" from cram_stats", val);
+            st->nsamp++;
+        }
+    } else {
+        hts_log_warning("Failed to remove val %"PRId64" from cram_stats", val);
+        st->nsamp++;
+    }
+}
+
+#if DEBUG_CRAM_STATS
+void cram_stats_dump(cram_stats *st) {
+    int i;
+    fprintf(stderr, "cram_stats:\n");
+    for (i = 0; i < MAX_STAT_VAL; i++) {
+        if (!st->freqs[i])
+            continue;
+        fprintf(stderr, "\t%d\t%d\n", i, st->freqs[i]);
+    }
+    if (st->h) {
+        khint_t k;
+        for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
+            if (!kh_exist(st->h, k))
+                continue;
+
+            fprintf(stderr, "\t%d\t%d\n", kh_key(st->h, k), kh_val(st->h, k));
+        }
+    }
+}
+#endif
+
+/*
+ * Computes entropy from integer frequencies for various encoding methods and
+ * picks the best encoding.
+ *
+ * FIXME: we could reuse some of the code here for the actual encoding
+ * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman.
+ *
+ * Returns the best codec to use.
+ */
+enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st) {
+    int nvals, i, max_val = 0, min_val = INT_MAX;
+    int *vals = NULL, *freqs = NULL, vals_alloc = 0;
+    int ntot HTS_UNUSED = 0;
+
+#if DEBUG_CRAM_STATS
+    cram_stats_dump(st);
+#endif
+
+    /* Count number of unique symbols */
+    for (nvals = i = 0; i < MAX_STAT_VAL; i++) {
+        if (!st->freqs[i])
+            continue;
+        if (nvals >= vals_alloc) {
+            vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
+            int *vals_tmp  = realloc(vals,  vals_alloc * sizeof(int));
+            int *freqs_tmp = realloc(freqs, vals_alloc * sizeof(int));
+            if (!vals_tmp || !freqs_tmp) {
+                free(vals_tmp  ? vals_tmp  : vals);
+                free(freqs_tmp ? freqs_tmp : freqs);
+                return E_HUFFMAN; // Cannot do much else atm
+            }
+            vals = vals_tmp;
+            freqs = freqs_tmp;
+        }
+        vals[nvals] = i;
+        freqs[nvals] = st->freqs[i];
+        ntot += freqs[nvals];
+        if (max_val < i) max_val = i;
+        if (min_val > i) min_val = i;
+        nvals++;
+    }
+    if (st->h) {
+        khint_t k;
+        int i;
+
+        for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
+            if (!kh_exist(st->h, k))
+                continue;
+
+            if (nvals >= vals_alloc) {
+                vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
+                int *vals_tmp  = realloc(vals,  vals_alloc * sizeof(int));
+                int *freqs_tmp = realloc(freqs, vals_alloc * sizeof(int));
+                if (!vals_tmp || !freqs_tmp) {
+                    free(vals_tmp  ? vals_tmp  : vals);
+                    free(freqs_tmp ? freqs_tmp : freqs);
+                    return E_HUFFMAN; // Cannot do much else atm
+                }
+                vals = vals_tmp;
+                freqs = freqs_tmp;
+            }
+            i = kh_key(st->h, k);
+            vals[nvals]=i;
+            freqs[nvals] = kh_val(st->h, k);
+            ntot += freqs[nvals];
+            if (max_val < i) max_val = i;
+            if (min_val > i) min_val = i;
+            nvals++;
+        }
+    }
+
+    st->nvals = nvals;
+    st->min_val = min_val;
+    st->max_val = max_val;
+    assert(ntot == st->nsamp);
+
+    free(vals);
+    free(freqs);
+
+    /*
+     * Simple policy that everything is external unless it can be
+     * encoded using zero bits as a unary item huffman table.
+     */
+    if (CRAM_MAJOR_VERS(fd->version) >= 4) {
+        // Note, we're assuming integer data here as we don't have the
+        // type passed in.  Cram_encoder_init does know the type and
+        // will convert to E_CONST_BYTE or E_EXTERNAL as appropriate.
+        if (nvals == 1)
+            return E_CONST_INT;
+        else if (nvals == 0 || min_val < 0)
+            return E_VARINT_SIGNED;
+        else
+            return E_VARINT_UNSIGNED;
+    } else {
+        return nvals <= 1 ? E_HUFFMAN : E_EXTERNAL;
+    }
+}
+
+void cram_stats_free(cram_stats *st) {
+    if (st->h)
+        kh_destroy(m_i2i, st->h);
+    free(st);
+}
--- a/ext/htslib/cram/cram_stats.h
+++ b/ext/htslib/cram/cram_stats.h
@ -0,0 +1,59 @@
+/*
+Copyright (c) 2012-2013, 2018 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CRAM_STATS_H
+#define CRAM_STATS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+cram_stats *cram_stats_create(void);
+int cram_stats_add(cram_stats *st, int64_t val);
+void cram_stats_del(cram_stats *st, int64_t val);
+void cram_stats_dump(cram_stats *st);
+void cram_stats_free(cram_stats *st);
+
+/*
+ * Computes entropy from integer frequencies for various encoding methods and
+ * picks the best encoding.
+ *
+ * FIXME: we could reuse some of the code here for the actual encoding
+ * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman.
+ *
+ * Returns the best codec to use.
+ */
+enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/ext/htslib/cram/cram_structs.h
+++ b/ext/htslib/cram/cram_structs.h
@ -0,0 +1,978 @@
+/*
+Copyright (c) 2012-2016, 2018-2020, 2023 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef HTSLIB_CRAM_STRUCTS_H
+#define HTSLIB_CRAM_STRUCTS_H
+
+/*
+ * Defines in-memory structs for the basic file-format objects in the
+ * CRAM format.
+ *
+ * The basic file format is:
+ *     File-def SAM-hdr Container Container ...
+ *
+ * Container:
+ *     Service-block data-block data-block ...
+ *
+ * Multiple blocks in a container are grouped together as slices,
+ * also sometimes referred to as landmarks in the spec.
+ */
+
+
+#include <pthread.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "../htslib/thread_pool.h"
+#include "../htslib/cram.h"
+#include "string_alloc.h"
+#include "mFILE.h"
+#include "../htslib/khash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Generic hash-map integer -> integer
+KHASH_MAP_INIT_INT64(m_i2i, int)
+
+// Generic hash-set integer -> (existence)
+KHASH_SET_INIT_INT(s_i2i)
+
+// For brevity
+typedef unsigned char uc;
+
+/*
+ * A union for the preservation map. Required for khash.
+ */
+typedef union {
+    int i;
+    char *p;
+} pmap_t;
+
+// Generates static functions here which isn't ideal, but we have no way
+// currently to declare the kh_map_t structure here without also declaring a
+// duplicate in the .c files due to the nature of the KHASH macros.
+KHASH_MAP_INIT_STR(map, pmap_t)
+
+struct hFILE;
+
+#define SEQS_PER_SLICE 10000
+#define BASES_PER_SLICE (SEQS_PER_SLICE*500)
+#define SLICE_PER_CNT  1
+
+#define CRAM_SUBST_MATRIX "CGTNGTANCATNGCANACGT"
+
+#define MAX_STAT_VAL 1024
+//#define MAX_STAT_VAL 16
+typedef struct cram_stats {
+    int freqs[MAX_STAT_VAL];
+    khash_t(m_i2i) *h;
+    int nsamp; // total number of values added
+    int nvals; // total number of unique values added
+    int64_t min_val, max_val;
+} cram_stats;
+
+/* NB: matches java impl, not the spec */
+enum cram_encoding {
+    E_NULL               = 0,
+    E_EXTERNAL           = 1,  // Only for BYTE type in CRAM 4
+    E_GOLOMB             = 2,  // Not in CRAM 4
+    E_HUFFMAN            = 3,  // Not in CRAM 4
+    E_BYTE_ARRAY_LEN     = 4,
+    E_BYTE_ARRAY_STOP    = 5,
+    E_BETA               = 6,  // Not in CRAM 4
+    E_SUBEXP             = 7,  // Not in CRAM 4
+    E_GOLOMB_RICE        = 8,  // Not in CRAM 4
+    E_GAMMA              = 9,  // Not in CRAM 4
+
+    // CRAM 4 specific codecs
+    E_VARINT_UNSIGNED    = 41, // Specialisation of EXTERNAL
+    E_VARINT_SIGNED      = 42, // Specialisation of EXTERNAL
+    E_CONST_BYTE         = 43, // Alternative to HUFFMAN with 1 symbol
+    E_CONST_INT          = 44, // Alternative to HUFFMAN with 1 symbol
+
+    // More experimental ideas, not documented in spec yet
+    E_XHUFFMAN           = 50, // To external block
+    E_XPACK              = 51, // Transform to sub-codec
+    E_XRLE               = 52, // Transform to sub-codec
+    E_XDELTA             = 53, // Transform to sub-codec
+
+    // Total number of codecs, not a real one.
+    E_NUM_CODECS,
+};
+
+enum cram_external_type {
+    E_INT                = 1,
+    E_LONG               = 2,
+    E_BYTE               = 3,
+    E_BYTE_ARRAY         = 4,
+    E_BYTE_ARRAY_BLOCK   = 5,
+    E_SINT               = 6, // signed INT
+    E_SLONG              = 7, // signed LONG
+};
+
+/* External IDs used by this implementation (only assumed during writing) */
+enum cram_DS_ID {
+    DS_CORE   = 0,
+    DS_aux    = 1, // aux_blk
+    DS_aux_OQ = 2,
+    DS_aux_BQ = 3,
+    DS_aux_BD = 4,
+    DS_aux_BI = 5,
+    DS_aux_FZ = 6, // also ZM:B
+    DS_aux_oq = 7, // other qualities
+    DS_aux_os = 8, // other sequences
+    DS_aux_oz = 9, // other strings
+    DS_ref,
+    DS_RN, // name_blk
+    DS_QS, // qual_blk
+    DS_IN, // base_blk
+    DS_SC, // soft_blk
+
+    DS_BF, // start loop
+    DS_CF,
+    DS_AP,
+    DS_RG,
+    DS_MQ,
+    DS_NS,
+    DS_MF,
+    DS_TS,
+    DS_NP,
+    DS_NF,
+    DS_RL,
+    DS_FN,
+    DS_FC,
+    DS_FP,
+    DS_DL,
+    DS_BA,
+    DS_BS,
+    DS_TL,
+    DS_RI,
+    DS_RS,
+    DS_PD,
+    DS_HC,
+    DS_BB,
+    DS_QQ,
+
+    DS_TN, // end loop
+
+    DS_RN_len,
+    DS_SC_len,
+    DS_BB_len,
+    DS_QQ_len,
+
+    DS_TC, // CRAM v1.0 tags
+    DS_TM, // test
+    DS_TV, // test
+
+    DS_END,
+};
+
+/* "File Definition Structure" */
+struct cram_file_def {
+    char    magic[4];
+    uint8_t major_version;
+    uint8_t minor_version;
+    char    file_id[20] HTS_NONSTRING; // Filename or SHA1 checksum
+};
+
+#define CRAM_MAJOR_VERS(v) ((v) >> 8)
+#define CRAM_MINOR_VERS(v) ((v) & 0xff)
+
+struct cram_slice;
+
+// Internal version of htslib/cram.h enum.
+// Note these have to match the laout of methmap and methcost in
+// cram_io.c:cram_compress_block2
+enum cram_block_method_int {
+    // Public methods as defined in the CRAM spec.
+    BM_ERROR = -1,
+
+    // CRAM 2.x and 3.0
+    RAW      = 0,
+    GZIP     = 1,
+    BZIP2    = 2,
+    LZMA     = 3,
+    RANS     = 4, RANS0 = RANS,
+
+    // CRAM 3.1 onwards
+    RANSPR   = 5, RANS_PR0  = RANSPR,
+    ARITH    = 6, ARITH_PR0 = ARITH,
+    FQZ      = 7,
+    TOK3     = 8,
+    // BSC = 9, ZSTD = 10
+
+    // Methods not externalised, but used in metrics.
+    // Externally they become one of the above methods.
+    GZIP_RLE = 11,
+    GZIP_1,      // Z_DEFAULT_STRATEGY level 1, NB: not externalised in CRAM
+
+    FQZ_b, FQZ_c, FQZ_d, // Various preset FQZ methods
+
+  //RANS0,       // Order 0
+    RANS1,
+
+  //RANS_PR0,    // Order 0
+    RANS_PR1,    // Order 1
+    RANS_PR64,   // O0 + RLE
+    RANS_PR9,    // O1 + X4
+    RANS_PR128,  // O0 + Pack
+    RANS_PR129,  // O1 + Pack
+    RANS_PR192,  // O0 + RLE + pack
+    RANS_PR193,  // O1 + RLE + pack
+
+  //TOK3,   // tok+rans
+    TOKA,   // tok+arith
+
+  //ARITH_PR0,   // Order 0
+    ARITH_PR1,   // Order 1
+    ARITH_PR64,  // O0 + RLE
+    ARITH_PR9,   // O1 + X4
+    ARITH_PR128, // O0 + Pack
+    ARITH_PR129, // O1 + Pack
+    ARITH_PR192, // O0 + RLE + pack
+    ARITH_PR193, // O1 + RLE + pack
+
+    // NB: must end on no more than 31 unless we change to a
+    // 64-bit method type.
+};
+
+/* Now in htslib/cram.h
+enum cram_content_type {
+    CT_ERROR           = -1,
+    FILE_HEADER        = 0,
+    COMPRESSION_HEADER = 1,
+    MAPPED_SLICE       = 2,
+    UNMAPPED_SLICE     = 3, // CRAM V1.0 only
+    EXTERNAL           = 4,
+    CORE               = 5,
+};
+*/
+
+/* Maximum simultaneous codecs allowed, 1 per bit */
+#define CRAM_MAX_METHOD 32
+
+/* Compression metrics */
+struct cram_metrics {
+    // number of trials and time to next trial
+    int trial;
+    int next_trial;
+    int consistency;
+
+    // aggregate sizes during trials
+    int sz[CRAM_MAX_METHOD];
+    int input_avg_sz, input_avg_delta;
+
+    // resultant method from trials
+    int method, revised_method;
+    int strat;
+
+    // Revisions of method, to allow culling of continually failing ones.
+    int cnt[CRAM_MAX_METHOD];
+
+    double extra[CRAM_MAX_METHOD];
+
+    // Not amenable to rANS bit-packing techniques; cardinality > 16
+    int unpackable;
+};
+
+// Hash aux key (XX:i) to cram_metrics
+KHASH_MAP_INIT_INT(m_metrics, cram_metrics*)
+
+
+/* Block */
+struct cram_block {
+    enum cram_block_method_int  method, orig_method;
+    enum cram_content_type  content_type;
+    int32_t  content_id;
+    int32_t  comp_size;
+    int32_t  uncomp_size;
+    uint32_t crc32;
+    int32_t  idx; /* offset into data */
+    unsigned char    *data;
+
+    // For bit I/O
+    size_t alloc;
+    size_t byte;
+    int bit;
+
+    // To aid compression
+    cram_metrics *m; // used to track aux block compression only
+
+    int crc32_checked;
+    uint32_t crc_part;
+};
+
+struct cram_codec; /* defined in cram_codecs.h */
+struct cram_map;
+
+#define CRAM_MAP_HASH 32
+#define CRAM_MAP(a,b) (((a)*3+(b))&(CRAM_MAP_HASH-1))
+
+/* Compression header block */
+struct cram_block_compression_hdr {
+    int32_t ref_seq_id;
+    int64_t ref_seq_start;
+    int64_t ref_seq_span;
+    int32_t num_records;
+    int32_t num_landmarks;
+    int32_t *landmark;
+
+    /* Flags from preservation map */
+    int read_names_included;
+    int AP_delta;
+    // indexed by ref-base and subst. code
+    char substitution_matrix[5][4];
+    int no_ref;
+    int qs_seq_orient; // 1 => same as seq. 0 => original orientation
+
+    // TD Dictionary as a concatenated block
+    cram_block *TD_blk;          // Tag Dictionary
+    int nTL;                     // number of TL entries in TD
+    unsigned char **TL;          // array of size nTL, pointer into TD_blk.
+    khash_t(m_s2i) *TD_hash;     // Keyed on TD strings, map to TL[] indices
+    string_alloc_t *TD_keys;     // Pooled keys for TD hash.
+
+    khash_t(map) *preservation_map;
+    struct cram_map *rec_encoding_map[CRAM_MAP_HASH];
+    struct cram_map *tag_encoding_map[CRAM_MAP_HASH];
+
+    struct cram_codec *codecs[DS_END];
+
+    char *uncomp; // A single block of uncompressed data
+    size_t uncomp_size, uncomp_alloc;
+
+    // Total codec count, used for index to block_by_id for transforms
+    int ncodecs;
+};
+
+typedef struct cram_map {
+    int key;    /* 0xe0 + 3 bytes */
+    enum cram_encoding encoding;
+    int offset; /* Offset into a single block of memory */
+    int size;   /* Size */
+    struct cram_codec *codec;
+    struct cram_map *next; // for noddy internal hash
+} cram_map;
+
+typedef struct cram_tag_map {
+    struct cram_codec *codec;
+    cram_block *blk;
+    cram_block *blk2;
+    cram_metrics *m;
+} cram_tag_map;
+
+// Hash aux key (XX:i) to cram_tag_map
+KHASH_MAP_INIT_INT(m_tagmap, cram_tag_map*)
+
+/* Mapped or unmapped slice header block */
+struct cram_block_slice_hdr {
+    enum cram_content_type content_type;
+    int32_t ref_seq_id;     /* if content_type == MAPPED_SLICE */
+    int64_t ref_seq_start;  /* if content_type == MAPPED_SLICE */
+    int64_t ref_seq_span;   /* if content_type == MAPPED_SLICE */
+    int32_t num_records;
+    int64_t record_counter;
+    int32_t num_blocks;
+    int32_t num_content_ids;
+    int32_t *block_content_ids;
+    int32_t ref_base_id;    /* if content_type == MAPPED_SLICE */
+    unsigned char md5[16];
+};
+
+struct ref_entry;
+
+/*
+ * Container.
+ *
+ * Conceptually a container is split into slices, and slices into blocks.
+ * However on disk it's just a list of blocks and we need to query the
+ * block types to identify the start/end points of the slices.
+ *
+ * OR... are landmarks the start/end points of slices?
+ */
+struct cram_container {
+    int32_t  length;
+    int32_t  ref_seq_id;
+    int64_t  ref_seq_start;
+    int64_t  ref_seq_span;
+    int64_t  record_counter;
+    int64_t  num_bases;
+    int32_t  num_records;
+    int32_t  num_blocks;
+    int32_t  num_landmarks;
+    int32_t *landmark;
+
+    /* Size of container header above */
+    size_t   offset;
+
+    /* Compression header is always the first block? */
+    cram_block_compression_hdr *comp_hdr;
+    cram_block *comp_hdr_block;
+
+    /* For construction purposes */
+    int max_slice, curr_slice;   // maximum number of slices
+    int curr_slice_mt;           // Curr_slice when reading ahead (via threads)
+    int max_rec, curr_rec;       // current and max recs per slice
+    int max_c_rec, curr_c_rec;   // current and max recs per container
+    int slice_rec;               // rec no. for start of this slice
+    int curr_ref;                // current ref ID. -2 for no previous
+    int64_t last_pos;                // last record position
+    struct cram_slice **slices, *slice;
+    int pos_sorted;              // boolean, 1=>position sorted data
+    int64_t max_apos;                // maximum position, used if pos_sorted==0
+    int last_slice;              // number of reads in last slice (0 for 1st)
+    int multi_seq;               // true if packing multi seqs per cont/slice
+    int unsorted;                // true is AP_delta is 0.
+    int qs_seq_orient;           // 1 => same as seq. 0 => original orientation
+
+    /* Copied from fd before encoding, to allow multi-threading */
+    int ref_id;
+    hts_pos_t ref_start, first_base, last_base, ref_end;
+    char *ref;
+    int embed_ref;               // 1 if embedding ref, 2 if embedding cons
+    int no_ref;                  // true if referenceless
+    //struct ref_entry *ref;
+
+    /* For multi-threading */
+    bam_seq_t **bams;
+
+    /* Statistics for encoding */
+    cram_stats *stats[DS_END];
+
+    khash_t(m_tagmap) *tags_used; // set of tag types in use, for tag encoding map
+    int *refs_used;       // array of frequency of ref seq IDs
+
+    uint32_t crc32;       // CRC32
+
+    uint64_t s_num_bases; // number of bases in this slice
+    uint64_t s_aux_bytes; // number of bytes of aux in BAM
+
+    uint32_t n_mapped;    // Number of mapped reads
+    int ref_free;         // whether 'ref' is owned by us and must be freed.
+};
+
+/*
+ * A single cram record
+ */
+typedef struct cram_record {
+    struct cram_slice *s; // Filled out by cram_decode only
+
+    int32_t ref_id;       // fixed for all recs in slice?
+    int32_t flags;        // BF
+    int32_t cram_flags;   // CF
+    int32_t len;          // RL
+    int64_t apos;         // AP
+    int32_t rg;           // RG
+    int32_t name;         // RN; idx to s->names_blk
+    int32_t name_len;
+    int32_t mate_line;    // index to another cram_record
+    int32_t mate_ref_id;
+    int64_t mate_pos;     // NP
+    int64_t tlen;         // TS
+    int64_t explicit_tlen;// TS, but PNEXT/RNEXT still need auto-computing
+
+    // Auxiliary data
+    int32_t ntags;        // TC
+    uint32_t aux;         // idx to s->aux_blk
+    uint32_t aux_size;    // total size of packed ntags in aux_blk
+#ifndef TN_external
+    int32_t TN_idx;       // TN; idx to s->TN;
+#else
+    int32_t tn;           // idx to s->tn_blk
+#endif
+    int     TL;
+
+    uint32_t seq;         // idx to s->seqs_blk
+    uint32_t qual;        // idx to s->qual_blk
+    uint32_t cigar;       // idx to s->cigar
+    int32_t ncigar;
+    int64_t aend;         // alignment end
+    int32_t mqual;        // MQ
+
+    uint32_t feature;     // idx to s->feature
+    uint32_t nfeature;    // number of features
+    int32_t mate_flags;   // MF
+} cram_record;
+
+// Accessor macros as an analogue of the bam ones
+#define cram_qname(c)    (&(c)->s->name_blk->data[(c)->name])
+#define cram_seq(c)      (&(c)->s->seqs_blk->data[(c)->seq])
+#define cram_qual(c)     (&(c)->s->qual_blk->data[(c)->qual])
+#define cram_aux(c)      (&(c)->s->aux_blk->data[(c)->aux])
+#define cram_seqi(c,i)   (cram_seq((c))[(i)])
+#define cram_name_len(c) ((c)->name_len)
+#define cram_strand(c)   (((c)->flags & BAM_FREVERSE) != 0)
+#define cram_mstrand(c)  (((c)->flags & BAM_FMREVERSE) != 0)
+#define cram_cigar(c)    (&((cr)->s->cigar)[(c)->cigar])
+
+/*
+ * A feature is a base difference, used for the sequence reference encoding.
+ * (We generate these internally when writing CRAM.)
+ */
+typedef union cram_feature {
+    struct {
+        int pos;
+        int code;
+        int base;    // substitution code
+    } X;
+    struct {
+        int pos;
+        int code;
+        int base;    // actual base & qual
+        int qual;
+    } B;
+    struct {
+        int pos;
+        int code;
+        int seq_idx; // index to s->seqs_blk
+        int len;
+    } b;
+    struct {
+        int pos;
+        int code;
+        int qual;
+    } Q;
+    struct {
+        int pos;
+        int code;
+        int len;
+        int seq_idx; // soft-clip multiple bases
+    } S;
+    struct {
+        int pos;
+        int code;
+        int len;
+        int seq_idx; // insertion multiple bases
+    } I;
+    struct {
+        int pos;
+        int code;
+        int base; // insertion single base
+    } i;
+    struct {
+        int pos;
+        int code;
+        int len;
+    } D;
+    struct {
+        int pos;
+        int code;
+        int len;
+    } N;
+    struct {
+        int pos;
+        int code;
+        int len;
+    } P;
+    struct {
+        int pos;
+        int code;
+        int len;
+    } H;
+} cram_feature;
+
+/*
+ * A slice is really just a set of blocks, but it
+ * is the logical unit for decoding a number of
+ * sequences.
+ */
+struct cram_slice {
+    cram_block_slice_hdr *hdr;
+    cram_block *hdr_block;
+    cram_block **block;
+    cram_block **block_by_id;
+
+    /* State used during encoding/decoding */
+    int64_t last_apos, max_apos;
+
+    /* Array of decoded cram records */
+    cram_record *crecs;
+
+    /* An dynamically growing buffers for data pointed
+     * to by crecs[] array.
+     */
+    uint32_t  *cigar;
+    uint32_t   cigar_alloc;
+    uint32_t   ncigar;
+
+    cram_feature *features;
+    uint32_t      nfeatures;
+    uint32_t      afeatures; // allocated size of features
+
+#ifndef TN_external
+    // TN field (Tag Name)
+    uint32_t      *TN;
+    int           nTN, aTN;  // used and allocated size for TN[]
+#else
+    cram_block *tn_blk;
+    int tn_id;
+#endif
+
+    // For variable sized elements which are always external blocks.
+    cram_block *name_blk;
+    cram_block *seqs_blk;
+    cram_block *qual_blk;
+    cram_block *base_blk;
+    cram_block *soft_blk;
+    cram_block *aux_blk;       // BAM aux block, created while decoding CRAM
+
+    string_alloc_t *pair_keys; // Pooled keys for pair hash.
+    khash_t(m_s2i) *pair[2];   // for identifying read-pairs in this slice.
+
+    char *ref;                 // slice of current reference
+    hts_pos_t ref_start;       // start position of current reference;
+    hts_pos_t ref_end;         // end position of current reference;
+    int ref_id;
+
+    // For going from BAM to CRAM; an array of auxiliary blocks per type
+    int naux_block;
+    cram_block **aux_block;
+
+    unsigned int data_series; // See cram_fields enum
+    int decode_md;
+
+    int max_rec, curr_rec;       // current and max recs per slice
+    int slice_num;               // To be copied into c->curr_slice in decode
+};
+
+/*-----------------------------------------------------------------------------
+ * Consider moving reference handling to cram_refs.[ch]
+ */
+// from fa.fai / samtools faidx files
+typedef struct ref_entry {
+    char *name;
+    char *fn;
+    int64_t length;
+    int64_t offset;
+    int bases_per_line;
+    int line_length;
+    int64_t count;         // for shared references so we know to dealloc seq
+    char *seq;
+    mFILE *mf;
+    int is_md5;            // Reference comes from a raw seq found by MD5
+    int validated_md5;
+} ref_entry;
+
+KHASH_MAP_INIT_STR(refs, ref_entry*)
+
+// References structure.
+struct refs_t {
+    string_alloc_t *pool;  // String pool for holding filenames and SN vals
+
+    khash_t(refs) *h_meta; // ref_entry*, index by name
+    ref_entry **ref_id;    // ref_entry*, index by ID
+    int nref;              // number of ref_entry
+
+    char *fn;              // current file opened
+    BGZF *fp;              // and the hFILE* to go with it.
+
+    int count;             // how many cram_fd sharing this refs struct
+
+    pthread_mutex_t lock;  // Mutex for multi-threaded updating
+    ref_entry *last;       // Last queried sequence
+    int last_id;           // Used in cram_ref_decr_locked to delay free
+};
+
+/*-----------------------------------------------------------------------------
+ * CRAM index
+ *
+ * Detect format by number of entries per line.
+ * 5 => 1.0 (refid, start, nseq, C offset, slice)
+ * 6 => 1.1 (refid, start, span, C offset, S offset, S size)
+ *
+ * Indices are stored in a nested containment list, which is trivial to set
+ * up as the indices are on sorted data so we're appending to the nclist
+ * in sorted order. Basically if a slice entirely fits within a previous
+ * slice then we append to that slices list. This is done recursively.
+ *
+ * Lists are sorted on two dimensions: ref id + slice coords.
+ */
+typedef struct cram_index {
+    int nslice, nalloc;   // total number of slices
+    struct cram_index *e; // array of size nslice
+
+    int     refid;  // 1.0                 1.1
+    int     start;  // 1.0                 1.1
+    int     end;    //                     1.1
+    int     nseq;   // 1.0 - undocumented
+    int     slice;  // 1.0 landmark index, 1.1 landmark value
+    int     len;    //                     1.1 - size of slice in bytes
+    int64_t offset; // 1.0                 1.1
+
+    // Linked list of cram_index entries. Used to convert recursive
+    // NCList back to a linear list.
+    struct cram_index *e_next;
+} cram_index;
+
+typedef struct {
+    int refid;
+    int64_t start;
+    int64_t end;
+} cram_range;
+
+/*-----------------------------------------------------------------------------
+ */
+/* CRAM File handle */
+
+typedef struct spare_bams {
+    bam_seq_t **bams;
+    struct spare_bams *next;
+} spare_bams;
+
+struct cram_fd;
+typedef struct varint_vec {
+    // Returns number of bytes decoded from fd, 0 on error
+    int (*varint_decode32_crc)(struct cram_fd *fd, int32_t *val_p, uint32_t *crc);
+    int (*varint_decode32s_crc)(struct cram_fd *fd, int32_t *val_p, uint32_t *crc);
+    int (*varint_decode64_crc)(struct cram_fd *fd, int64_t *val_p, uint32_t *crc);
+
+    // Returns the value and increments *cp.  Sets err to 1 iff an error occurs.
+    // NOTE: Does not set err to 0 on success.
+    int64_t (*varint_get32) (char **cp, const char *endp, int *err);
+    int64_t (*varint_get32s)(char **cp, const char *endp, int *err);
+    int64_t (*varint_get64) (char **cp, const char *endp, int *err);
+    int64_t (*varint_get64s)(char **cp, const char *endp, int *err);
+
+    // Returns the number of bytes written, <= 0 on error.
+    int (*varint_put32) (char *cp, char *endp, int32_t val_p);
+    int (*varint_put32s)(char *cp, char *endp, int32_t val_p);
+    int (*varint_put64) (char *cp, char *endp, int64_t val_p);
+    int (*varint_put64s)(char *cp, char *endp, int64_t val_p);
+
+    // Returns the number of bytes written, <= 0 on error.
+    int (*varint_put32_blk) (cram_block *blk, int32_t val_p);
+    int (*varint_put32s_blk)(cram_block *blk, int32_t val_p);
+    int (*varint_put64_blk) (cram_block *blk, int64_t val_p);
+    int (*varint_put64s_blk)(cram_block *blk, int64_t val_p);
+
+    // Returns number of bytes needed to encode 'val'
+    int (*varint_size)(int64_t val);
+} varint_vec;
+
+struct cram_fd {
+    struct hFILE  *fp;
+    int            mode;     // 'r' or 'w'
+    int            version;
+    cram_file_def *file_def;
+    sam_hdr_t     *header;
+
+    char          *prefix;
+    int64_t        record_counter;
+    int            err;
+
+    // Most recent compression header decoded
+    //cram_block_compression_hdr *comp_hdr;
+    //cram_block_slice_hdr       *slice_hdr;
+
+    // Current container being processed
+    cram_container *ctr;
+
+    // Current container used for decoder threads
+    cram_container *ctr_mt;
+
+    // positions for encoding or decoding
+    int first_base, last_base; // copied to container
+
+    // cached reference portion
+    refs_t   *refs;                // ref meta-data structure
+    char     *ref, *ref_free;      // current portion held in memory
+    int       ref_id;              // copied to container
+    hts_pos_t ref_start;           // copied to container
+    hts_pos_t ref_end;             // copied to container
+    char     *ref_fn;              // reference fasta filename
+
+    // compression level and metrics
+    int level;
+    cram_metrics *m[DS_END];
+    khash_t(m_metrics) *tags_used; // cram_metrics[], per tag types in use.
+
+    // options
+    int decode_md; // Whether to export MD and NM tags
+    int seqs_per_slice;
+    int bases_per_slice;
+    int slices_per_container;
+    int embed_ref; // copied to container
+    int no_ref;    // copied to container
+    int no_ref_counter; // decide if permanent switch
+    int ignore_md5;
+    int use_bz2;
+    int use_rans;
+    int use_lzma;
+    int use_fqz;
+    int use_tok;
+    int use_arith;
+    int shared_ref;
+    unsigned int required_fields;
+    int store_md;
+    int store_nm;
+    cram_range range;
+
+    // lookup tables, stored here so we can be trivially multi-threaded
+    unsigned int bam_flag_swap[0x1000]; // cram -> bam flags
+    unsigned int cram_flag_swap[0x1000];// bam -> cram flags
+    unsigned char L1[256];              // ACGT{*} ->0123{4}
+    unsigned char L2[256];              // ACGTN{*}->01234{5}
+    char cram_sub_matrix[32][32];       // base substitution codes
+
+    int         index_sz;
+    cram_index *index;                  // array, sizeof index_sz
+    off_t first_container;
+    off_t curr_position;
+    int eof;
+    int last_slice;                     // number of recs encoded in last slice
+    int last_RI_count;                  // number of references encoded in last container
+    int multi_seq;                      // -1 is auto, 0 is one ref per container, 1 is multi...
+    int multi_seq_user;                 // Original user setting (CRAM_OPT_MULTI_SEQ_PER_SLICE)
+    int unsorted;
+    int last_mapped;                    // number of mapped reads in last container
+    int empty_container;                // Marker for EOF block
+
+    // thread pool
+    int own_pool;
+    hts_tpool *pool;
+    hts_tpool_process *rqueue;
+    pthread_mutex_t metrics_lock;
+    pthread_mutex_t ref_lock;
+    pthread_mutex_t range_lock;
+    spare_bams *bl;
+    pthread_mutex_t bam_list_lock;
+    void *job_pending;
+    int ooc;                            // out of containers.
+
+    int lossy_read_names;               // boolean
+    int tlen_approx;                    // max TLEN calculation offset.
+    int tlen_zero;                      // If true, permit tlen 0 (=> tlen calculated)
+
+    BGZF *idxfp;                        // File pointer for on-the-fly index creation
+
+    // variable integer decoding callbacks.
+    // This changed in CRAM4.0 to a data-size agnostic encoding.
+    varint_vec vv;
+
+    // Force AP delta even on non positional sorted data.
+    // This can be beneficial for pairs where pairs are nearby each other.
+    // We suffer with delta to unrelated things (previous pair), but gain
+    // in delta between them.  (Ideal would be a per read setting.)
+    int ap_delta;
+};
+
+// Translation of required fields to cram data series
+enum cram_fields {
+    CRAM_BF = 0x00000001,
+    CRAM_AP = 0x00000002,
+    CRAM_FP = 0x00000004,
+    CRAM_RL = 0x00000008,
+    CRAM_DL = 0x00000010,
+    CRAM_NF = 0x00000020,
+    CRAM_BA = 0x00000040,
+    CRAM_QS = 0x00000080,
+    CRAM_FC = 0x00000100,
+    CRAM_FN = 0x00000200,
+    CRAM_BS = 0x00000400,
+    CRAM_IN = 0x00000800,
+    CRAM_RG = 0x00001000,
+    CRAM_MQ = 0x00002000,
+    CRAM_TL = 0x00004000,
+    CRAM_RN = 0x00008000,
+    CRAM_NS = 0x00010000,
+    CRAM_NP = 0x00020000,
+    CRAM_TS = 0x00040000,
+    CRAM_MF = 0x00080000,
+    CRAM_CF = 0x00100000,
+    CRAM_RI = 0x00200000,
+    CRAM_RS = 0x00400000,
+    CRAM_PD = 0x00800000,
+    CRAM_HC = 0x01000000,
+    CRAM_SC = 0x02000000,
+    CRAM_BB = 0x04000000,
+    CRAM_BB_len = 0x08000000,
+    CRAM_QQ = 0x10000000,
+    CRAM_QQ_len = 0x20000000,
+    CRAM_aux= 0x40000000,
+    CRAM_ALL= 0x7fffffff,
+};
+
+// A CIGAR opcode, but not necessarily the implications of it. Eg FC/FP may
+// encode a base difference, but we don't need to know what it is for CIGAR.
+// If we have a soft-clip or insertion, we do need SC/IN though to know how
+// long that array is.
+#define CRAM_CIGAR (CRAM_FN | CRAM_FP | CRAM_FC | CRAM_DL | CRAM_IN | \
+                    CRAM_SC | CRAM_HC | CRAM_PD | CRAM_RS | CRAM_RL | CRAM_BF)
+
+#define CRAM_SEQ (CRAM_CIGAR | CRAM_BA | CRAM_BS | \
+                  CRAM_RL    | CRAM_AP | CRAM_BB)
+
+#define CRAM_QUAL (CRAM_CIGAR | CRAM_RL | CRAM_AP | CRAM_QS | CRAM_QQ)
+
+/* BF bitfields */
+/* Corrected in 1.1. Use bam_flag_swap[bf] and BAM_* macros for 1.0 & 1.1 */
+#define CRAM_FPAIRED      256
+#define CRAM_FPROPER_PAIR 128
+#define CRAM_FUNMAP        64
+#define CRAM_FREVERSE      32
+#define CRAM_FREAD1        16
+#define CRAM_FREAD2         8
+#define CRAM_FSECONDARY     4
+#define CRAM_FQCFAIL        2
+#define CRAM_FDUP           1
+
+#define DS_aux_S "\001"
+#define DS_aux_OQ_S "\002"
+#define DS_aux_BQ_S "\003"
+#define DS_aux_BD_S "\004"
+#define DS_aux_BI_S "\005"
+#define DS_aux_FZ_S "\006"
+#define DS_aux_oq_S "\007"
+#define DS_aux_os_S "\010"
+#define DS_aux_oz_S "\011"
+
+#define CRAM_M_REVERSE  1
+#define CRAM_M_UNMAP    2
+
+
+/* CF bitfields */
+#define CRAM_FLAG_PRESERVE_QUAL_SCORES (1<<0)
+#define CRAM_FLAG_DETACHED             (1<<1)
+#define CRAM_FLAG_MATE_DOWNSTREAM      (1<<2)
+#define CRAM_FLAG_NO_SEQ               (1<<3)
+#define CRAM_FLAG_EXPLICIT_TLEN        (1<<4)
+#define CRAM_FLAG_MASK                 ((1<<5)-1)
+
+/* Internal only */
+#define CRAM_FLAG_STATS_ADDED          (1<<30)
+#define CRAM_FLAG_DISCARD_NAME         (1U<<31)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTSLIB_CRAM_STRUCTS_H */
--- a/ext/htslib/cram/mFILE.c
+++ b/ext/htslib/cram/mFILE.c
@ -0,0 +1,668 @@
+/*
+Copyright (c) 2005-2006, 2008-2009, 2013, 2015, 2017-2019 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "../htslib/hts_log.h"
+#include "os.h"
+#include "mFILE.h"
+
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#endif
+
+/*
+ * This file contains memory-based versions of the most commonly used
+ * (by io_lib) stdio functions.
+ *
+ * Actual file IO takes place either on opening or closing an mFILE.
+ *
+ * Coupled to this are a bunch of rather scary macros which can be obtained
+ * by including stdio_hack.h. It is recommended though that you use mFILE.h
+ * instead and replace fopen with mfopen (etc). This is more or less
+ * mandatory if you wish to use both FILE and mFILE structs in a single file.
+ */
+
+static mFILE *m_channel[3];  /* stdin, stdout and stderr fakes */
+
+/*
+ * Reads the entirety of fp into memory. If 'fn' exists it is the filename
+ * associated with fp. This will be used for more optimal reading (via a
+ * stat to identify the size and a single read). Otherwise we use successive
+ * reads until EOF.
+ *
+ * Returns a malloced buffer on success of length *size
+ *         NULL on failure
+ */
+static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) {
+    struct stat sb;
+    char *data = NULL;
+    size_t allocated = 0, used = 0;
+    int bufsize = 8192;
+
+#ifdef _WIN32
+    if (binary)
+        _setmode(_fileno(fp), _O_BINARY);
+    else
+        _setmode(_fileno(fp), _O_TEXT);
+#endif
+
+    if (fn && -1 != stat(fn, &sb)) {
+        data = malloc(allocated = sb.st_size);
+        if (!data)
+            return NULL;
+        bufsize = sb.st_size;
+    } else {
+        fn = NULL;
+    }
+
+    do {
+        size_t len;
+        if (used + bufsize > allocated) {
+            allocated += bufsize;
+            char *datan = realloc(data, allocated);
+            if (datan) {
+                data = datan;
+            } else {
+                free(data);
+                return NULL;
+            }
+        }
+        len = fread(data + used, 1, allocated - used, fp);
+        if (len > 0)
+            used += len;
+    } while (!feof(fp) && (fn == NULL || used < sb.st_size));
+
+    *size = used;
+
+    return data;
+}
+
+
+#ifdef HAVE_MMAP
+/*
+ * mmaps in the file, but only for reading currently.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int mfmmap(mFILE *mf, FILE *fp, const char *fn) {
+    struct stat sb;
+
+    if (stat(fn, &sb) != 0)
+        return -1;
+
+    mf->size = sb.st_size;
+    mf->data = mmap(NULL, mf->size, PROT_READ, MAP_SHARED,
+                    fileno(fp), 0);
+
+    if (!mf->data || mf->data == (void *)-1)
+        return -1;
+
+    mf->alloced = 0;
+    return 0;
+}
+#endif
+
+
+/*
+ * Creates and returns m_channel[0].
+ * We initialise this on the first attempted read, which then slurps in
+ * all of stdin until EOF is met.
+ */
+mFILE *mstdin(void) {
+    if (m_channel[0])
+        return m_channel[0];
+
+    m_channel[0] = mfcreate(NULL, 0);
+    if (NULL == m_channel[0]) return NULL;
+    m_channel[0]->fp = stdin;
+    return m_channel[0];
+}
+
+static void init_mstdin(void) {
+    static int done_stdin = 0;
+    if (done_stdin)
+        return;
+
+    m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1);
+    m_channel[0]->mode = MF_READ;
+    done_stdin = 1;
+}
+
+/*
+ * Creates and returns m_channel[1]. This is the fake for stdout. It starts as
+ * an empty buffer which is physically written out only when mfflush or
+ * mfclose are called.
+ */
+mFILE *mstdout(void) {
+    if (m_channel[1])
+        return m_channel[1];
+
+    m_channel[1] = mfcreate(NULL, 0);
+    if (NULL == m_channel[1]) return NULL;
+    m_channel[1]->fp = stdout;
+    m_channel[1]->mode = MF_WRITE;
+    return m_channel[1];
+}
+
+/*
+ * Stderr as an mFILE.
+ * The code handles stderr by returning m_channel[2], but also checking
+ * for stderr in fprintf (the common usage of it) to auto-flush.
+ */
+mFILE *mstderr(void) {
+    if (m_channel[2])
+        return m_channel[2];
+
+    m_channel[2] = mfcreate(NULL, 0);
+    if (NULL == m_channel[2]) return NULL;
+    m_channel[2]->fp = stderr;
+    m_channel[2]->mode = MF_WRITE;
+    return m_channel[2];
+}
+
+
+/*
+ * For creating existing mFILE pointers directly from memory buffers.
+ */
+mFILE *mfcreate(char *data, int size) {
+    mFILE *mf = (mFILE *)malloc(sizeof(*mf));
+    if (NULL == mf) return NULL;
+    mf->fp = NULL;
+    mf->data = data;
+    mf->alloced = size;
+    mf->size = size;
+    mf->eof = 0;
+    mf->offset = 0;
+    mf->flush_pos = 0;
+    mf->mode = MF_READ | MF_WRITE;
+    return mf;
+}
+
+/*
+ * Recreate an existing mFILE to house new data/size.
+ * It also rewinds the file.
+ */
+void mfrecreate(mFILE *mf, char *data, int size) {
+    if (mf->data)
+        free(mf->data);
+    mf->data = data;
+    mf->size = size;
+    mf->alloced = size;
+    mf->eof = 0;
+    mf->offset = 0;
+    mf->flush_pos = 0;
+}
+
+
+/*
+ * Creates a new mFILE to contain the contents of the FILE pointer.
+ * This mFILE is purely for in-memory operations and has no links to the
+ * original FILE* it came from. It also doesn't close the FILE pointer.
+ * Consider using mfreopen() is you need different behaviour.
+ *
+ * Returns mFILE * on success
+ *         NULL on failure.
+ */
+mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) {
+    mFILE *mf;
+
+    /* Open using mfreopen() */
+    if (NULL == (mf = mfreopen(path, mode_str, fp)))
+        return NULL;
+
+    /* Disassociate from the input stream */
+    mf->fp = NULL;
+
+    return mf;
+}
+
+/*
+ * Converts a FILE * to an mFILE *.
+ * Use this for wrapper functions to turn external prototypes requiring
+ * FILE * as an argument into internal code using mFILE *.
+ */
+mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) {
+    mFILE *mf;
+    int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0;
+
+    /* Parse mode:
+     * r = read file contents (if truncated => don't read)
+     * w = write on close
+     * a = position at end of buffer
+     * x = position at same location as the original fp, don't seek on flush
+     * + = for update (read and write)
+     * m = mmap (read only)
+     */
+    if (strchr(mode_str, 'r'))
+        r = 1, mode |= MF_READ;
+    if (strchr(mode_str, 'w'))
+        w = 1, mode |= MF_WRITE | MF_TRUNC;
+    if (strchr(mode_str, 'a'))
+        w = a = 1, mode |= MF_WRITE | MF_APPEND;
+    if (strchr(mode_str, 'b'))
+        b = 1, mode |= MF_BINARY;
+    if (strchr(mode_str, 'x'))
+        x = 1;
+    if (strchr(mode_str, '+')) {
+        w = 1, mode |= MF_READ | MF_WRITE;
+        if (a)
+            r = 1;
+    }
+#ifdef HAVE_MMAP
+    if (strchr(mode_str, 'm'))
+        if (!w) mode |= MF_MMAP;
+#endif
+
+    if (r) {
+        mf = mfcreate(NULL, 0);
+        if (NULL == mf) return NULL;
+        if (!(mode & MF_TRUNC)) {
+#ifdef HAVE_MMAP
+            if (mode & MF_MMAP) {
+                if (mfmmap(mf, fp, path) == -1) {
+                    mf->data = NULL;
+                    mode &= ~MF_MMAP;
+                }
+            }
+#endif
+            if (!mf->data) {
+                mf->data = mfload(fp, path, &mf->size, b);
+                if (!mf->data) {
+                    free(mf);
+                    return NULL;
+                }
+                mf->alloced = mf->size;
+                if (!a)
+                    fseek(fp, 0, SEEK_SET);
+            }
+        }
+    } else if (w) {
+        /* Write - initialise the data structures */
+        mf = mfcreate(NULL, 0);
+        if (NULL == mf) return NULL;
+    } else {
+        hts_log_error("Must specify either r, w or a for mode");
+        return NULL;
+    }
+    mf->fp = fp;
+    mf->mode = mode;
+
+    if (x) {
+        mf->mode |= MF_MODEX;
+    }
+
+    if (a) {
+        mf->flush_pos = mf->size;
+        fseek(fp, 0, SEEK_END);
+    }
+
+    return mf;
+}
+
+/*
+ * Opens a file. If we have read access (r or a+) then it loads the entire
+ * file into memory. If We have write access then the pathname is stored.
+ * We do not actually write until an mfclose, which then checks this pathname.
+ */
+mFILE *mfopen(const char *path, const char *mode) {
+    FILE *fp;
+
+    if (NULL == (fp = fopen(path, mode)))
+        return NULL;
+    return mfreopen(path, mode, fp);
+}
+
+/*
+ * Closes an mFILE. If the filename is known (implying write access) then this
+ * also writes the data to disk.
+ *
+ * Stdout is handled by calling mfflush which writes to stdout if appropriate.
+ */
+int mfclose(mFILE *mf) {
+    if (!mf)
+        return -1;
+
+    mfflush(mf);
+
+#ifdef HAVE_MMAP
+    if ((mf->mode & MF_MMAP) && mf->data) {
+        /* Mmaped */
+        munmap(mf->data, mf->size);
+        mf->data = NULL;
+    }
+#endif
+
+    if (mf->fp)
+        fclose(mf->fp);
+
+    mfdestroy(mf);
+
+    return 0;
+}
+
+/*
+ * Closes the file pointer contained within the mFILE without destroying
+ * the in-memory data.
+ *
+ * Attempting to do this on an mmaped buffer is an error.
+ */
+int mfdetach(mFILE *mf) {
+    if (!mf)
+        return -1;
+
+    mfflush(mf);
+    if (mf->mode & MF_MMAP)
+        return -1;
+
+    if (mf->fp) {
+        fclose(mf->fp);
+        mf->fp = NULL;
+    }
+
+    return 0;
+}
+
+/*
+ * Destroys an mFILE structure but does not flush or close it
+ */
+int mfdestroy(mFILE *mf) {
+    if (!mf)
+        return -1;
+
+    if (mf->data)
+        free(mf->data);
+    free(mf);
+
+    return 0;
+}
+
+/*
+ * Steals that data out of an mFILE.  The mFILE itself will be closed.
+ * It is up to the caller to free the stolen buffer.  If size_out is
+ * not NULL, mf->size will be stored in it.
+ * This is more-or-less the opposite of mfcreate().
+ *
+ * Note, we cannot steal the allocated buffer from an mmaped mFILE.
+ */
+
+void *mfsteal(mFILE *mf, size_t *size_out) {
+    void *data;
+
+    if (!mf) return NULL;
+
+    data = mf->data;
+
+    if (NULL != size_out) *size_out = mf->size;
+
+    if (mfdetach(mf) != 0)
+        return NULL;
+
+    mf->data = NULL;
+    mfdestroy(mf);
+
+    return data;
+}
+
+/*
+ * Seek/tell functions. Nothing more than updating and reporting an
+ * in-memory index. NB we can seek on stdin or stdout even provided we
+ * haven't been flushing.
+ */
+int mfseek(mFILE *mf, long offset, int whence) {
+    switch (whence) {
+    case SEEK_SET:
+        mf->offset = offset;
+        break;
+    case SEEK_CUR:
+        mf->offset += offset;
+        break;
+    case SEEK_END:
+        mf->offset = mf->size + offset;
+        break;
+    default:
+        errno = EINVAL;
+        return -1;
+    }
+
+    mf->eof = 0;
+    return 0;
+}
+
+long mftell(mFILE *mf) {
+    return mf->offset;
+}
+
+void mrewind(mFILE *mf) {
+    mf->offset = 0;
+    mf->eof = 0;
+}
+
+/*
+ * mftruncate is not directly a translation of ftruncate as the latter
+ * takes a file descriptor instead of a FILE *. It performs the analogous
+ * role though.
+ *
+ * If offset is -1 then the file is truncated to be the current file
+ * offset.
+ */
+void mftruncate(mFILE *mf, long offset) {
+    mf->size = offset != -1 ? offset : mf->offset;
+    if (mf->offset > mf->size)
+        mf->offset = mf->size;
+}
+
+int mfeof(mFILE *mf) {
+    return mf->eof;
+}
+
+/*
+ * mFILE read/write functions. Basically these turn fread/fwrite syntax
+ * into memcpy statements, with appropriate memory handling for writing.
+ */
+size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
+    size_t len;
+    char *cptr = (char *)ptr;
+
+    if (mf == m_channel[0]) init_mstdin();
+
+    if (mf->size <= mf->offset)
+        return 0;
+
+    len = size * nmemb <= mf->size - mf->offset
+        ? size * nmemb
+        : mf->size - mf->offset;
+    if (!size)
+        return 0;
+
+    memcpy(cptr, &mf->data[mf->offset], len);
+    mf->offset += len;
+
+    if (len != size * nmemb) {
+        mf->eof = 1;
+    }
+
+    return len / size;
+}
+
+size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
+    if (!(mf->mode & MF_WRITE))
+        return 0;
+
+    /* Append mode => forced all writes to end of file */
+    if (mf->mode & MF_APPEND)
+        mf->offset = mf->size;
+
+    /* Make sure we have enough room */
+    while (size * nmemb + mf->offset > mf->alloced) {
+        size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
+        void * new_data = realloc(mf->data, new_alloced);
+        if (NULL == new_data) return 0;
+        mf->alloced = new_alloced;
+        mf->data    = new_data;
+    }
+
+    /* Record where we need to reflush from */
+    if (mf->offset < mf->flush_pos)
+        mf->flush_pos = mf->offset;
+
+    /* Copy the data over */
+    memcpy(&mf->data[mf->offset], ptr, size * nmemb);
+    mf->offset += size * nmemb;
+    if (mf->size < mf->offset)
+        mf->size = mf->offset;
+
+    return nmemb;
+}
+
+int mfgetc(mFILE *mf) {
+    if (mf == m_channel[0]) init_mstdin();
+    if (mf->offset < mf->size) {
+        return (unsigned char)mf->data[mf->offset++];
+    }
+
+    mf->eof = 1;
+    return -1;
+}
+
+int mungetc(int c, mFILE *mf) {
+    if (mf->offset > 0) {
+        mf->data[--mf->offset] = c;
+        return c;
+    }
+
+    mf->eof = 1;
+    return -1;
+}
+
+char *mfgets(char *s, int size, mFILE *mf) {
+    int i;
+
+    if (mf == m_channel[0]) init_mstdin();
+    *s = 0;
+    for (i = 0; i < size-1;) {
+        if (mf->offset < mf->size) {
+            s[i] = mf->data[mf->offset++];
+            if (s[i++] == '\n')
+                break;
+        } else {
+            mf->eof = 1;
+            break;
+        }
+    }
+
+    s[i] = 0;
+    return i ? s : NULL;
+}
+
+/*
+ * Flushes an mFILE. If this is a real open of a file in write mode then
+ * mFILE->fp will be set. We then write out any new data in mFILE since the
+ * last flush. We cannot tell what may have been modified as we don't keep
+ * track of that, so we typically rewrite out the entire file contents between
+ * the last flush_pos and the end of file.
+ *
+ * For stderr/stdout we also reset the offsets so we cannot modify things
+ * we've already output.
+ */
+int mfflush(mFILE *mf) {
+    if (!mf->fp)
+        return 0;
+
+    /* FIXME: only do this when opened in write mode */
+    if (mf == m_channel[1] || mf == m_channel[2]) {
+        if (mf->flush_pos < mf->size) {
+            size_t bytes = mf->size - mf->flush_pos;
+            if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
+                return -1;
+            if (0 != fflush(mf->fp))
+                return -1;
+        }
+
+        /* Stdout & stderr are non-seekable streams so throw away the data */
+        mf->offset = mf->size = mf->flush_pos = 0;
+    }
+
+    /* only flush when opened in write mode */
+    if (mf->mode & MF_WRITE) {
+        if (mf->flush_pos < mf->size) {
+            size_t bytes = mf->size - mf->flush_pos;
+            if (!(mf->mode & MF_MODEX)) {
+                fseek(mf->fp, mf->flush_pos, SEEK_SET);
+            }
+            if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
+                return -1;
+            if (0 != fflush(mf->fp))
+                return -1;
+        }
+        if (ftell(mf->fp) != -1 &&
+            ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1)
+            return -1;
+        mf->flush_pos = mf->size;
+    }
+
+    return 0;
+}
+
+/*
+ * Converts an mFILE from binary to ascii mode by replacing all
+ * cr-nl with nl.
+ *
+ * Primarily used on windows when we've uncompressed a binary file which
+ * happens to be a text file (eg Experiment File). Previously we would have
+ * seeked back to the start and used _setmode(fileno(fp), _O_TEXT).
+ *
+ * Side effect: resets offset and flush_pos back to the start.
+ */
+void mfascii(mFILE *mf) {
+    size_t p1, p2;
+
+    for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) {
+        if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') {
+            p2--; /* delete the \r */
+        }
+        mf->data[p2] = mf->data[p1];
+    }
+    mf->size = p2;
+
+    mf->offset = mf->flush_pos = 0;
+}
--- a/ext/htslib/cram/mFILE.h
+++ b/ext/htslib/cram/mFILE.h
@ -0,0 +1,93 @@
+/*
+Copyright (c) 2005-2006, 2008-2009, 2013, 2018 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CRAM_MFILE_H
+#define CRAM_MFILE_H
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    FILE *fp;
+    char *data;
+    size_t alloced;
+    int eof;
+    int mode; /* open mode in MF_?? define bit pattern */
+    size_t size;
+    size_t offset;
+    size_t flush_pos;
+} mFILE;
+
+// Work around a clash with winuser.h
+#ifdef MF_APPEND
+#  undef MF_APPEND
+#endif
+
+#define MF_READ    1
+#define MF_WRITE   2
+#define MF_APPEND  4
+#define MF_BINARY  8
+#define MF_TRUNC  16
+#define MF_MODEX  32
+#define MF_MMAP   64
+
+mFILE *mfreopen(const char *path, const char *mode, FILE *fp);
+mFILE *mfopen(const char *path, const char *mode);
+int mfdetach(mFILE *mf);
+int mfclose(mFILE *mf);
+int mfdestroy(mFILE *mf);
+int mfseek(mFILE *mf, long offset, int whence);
+long mftell(mFILE *mf);
+void mrewind(mFILE *mf);
+void mftruncate(mFILE *mf, long offset);
+int mfeof(mFILE *mf);
+size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf);
+size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf);
+int mfgetc(mFILE *mf);
+int mungetc(int c, mFILE *mf);
+mFILE *mfcreate(char *data, int size);
+mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp);
+void mfrecreate(mFILE *mf, char *data, int size);
+void *mfsteal(mFILE *mf, size_t *size_out);
+char *mfgets(char *s, int size, mFILE *mf);
+int mfflush(mFILE *mf);
+mFILE *mstdin(void);
+mFILE *mstdout(void);
+mFILE *mstderr(void);
+void mfascii(mFILE *mf);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CRAM_MFILE_H */
--- a/ext/htslib/cram/misc.h
+++ b/ext/htslib/cram/misc.h
@ -0,0 +1,77 @@
+/*
+Copyright (c) 1994-1997, 2001-2002 MEDICAL RESEARCH COUNCIL
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1 Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2 Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
+MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or
+promote products derived from this software without specific prior written
+permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+Copyright (c) 2003-2013, 2018-2019 Genome Research Ltd.
+
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CRAM_MISC_H
+#define CRAM_MISC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MIN(A,B) ( ( (A) < (B) ) ? (A) : (B) )
+#define MAX(A,B) ( ( (A) > (B) ) ? (A) : (B) )
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CRAM_MISC_H */
--- a/ext/htslib/cram/open_trace_file.c
+++ b/ext/htslib/cram/open_trace_file.c
@ -0,0 +1,438 @@
+/*
+Author: James Bonfield
+
+Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
+MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or
+promote products derived from this software without specific prior written
+permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+Copyright (c) 2008, 2009, 2013, 2014-2015, 2018-2020 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "os.h"
+#ifndef PATH_MAX
+#  define PATH_MAX 1024
+#endif
+
+#include "open_trace_file.h"
+#include "misc.h"
+#include "../htslib/hfile.h"
+#include "../htslib/hts_log.h"
+#include "../htslib/hts.h"
+
+/*
+ * Returns whether the path refers to a regular file.
+ */
+static int is_file(char *fn) {
+    struct stat buf;
+    if ( stat(fn,&buf) ) return 0;
+    return S_ISREG(buf.st_mode);
+}
+
+/*
+ * Tokenises the search path splitting on colons (unix) or semicolons
+ * (windows).
+ * We also  explicitly add a "./" to the end of the search path
+ *
+ * Returns: A new search path with items separated by nul chars. Two nul
+ *          chars in a row represent the end of the tokenised path.
+ * Returns NULL for a failure.
+ *
+ * The returned data has been malloced. It is up to the caller to free this
+ * memory.
+ */
+char *tokenise_search_path(const char *searchpath) {
+    char *newsearch;
+    unsigned int i, j;
+    size_t len;
+    char path_sep = HTS_PATH_SEPARATOR_CHAR;
+
+    if (!searchpath)
+        searchpath="";
+
+    newsearch = (char *)malloc((len = strlen(searchpath))+5);
+    if (!newsearch)
+        return NULL;
+
+    for (i = 0, j = 0; i < len; i++) {
+        /* "::" => ":". Used for escaping colons in http://foo */
+        if (i < len-1 && searchpath[i] == ':' && searchpath[i+1] == ':') {
+            newsearch[j++] = ':';
+            i++;
+            continue;
+        }
+
+        /* Handle http:// and ftp:// too without :: */
+        if (path_sep == ':') {
+            if ((i == 0 || (i > 0 && searchpath[i-1] == ':')) &&
+                (!strncmp(&searchpath[i], "http:",     5) ||
+                 !strncmp(&searchpath[i], "https:",    6) ||
+                 !strncmp(&searchpath[i], "ftp:",      4) ||
+                 !strncmp(&searchpath[i], "|http:",    6) ||
+                 !strncmp(&searchpath[i], "|https:",   7) ||
+                 !strncmp(&searchpath[i], "|ftp:",     5) ||
+                 !strncmp(&searchpath[i], "URL=http:", 9) ||
+                 !strncmp(&searchpath[i], "URL=https:",10)||
+                 !strncmp(&searchpath[i], "URL=ftp:",  8))) {
+                do {
+                    newsearch[j++] = searchpath[i];
+                } while (i<len && searchpath[i++] != ':');
+                if (searchpath[i] == ':')
+                    i++;
+                if (searchpath[i]=='/')
+                    newsearch[j++] = searchpath[i++];
+                if (searchpath[i]=='/')
+                    newsearch[j++] = searchpath[i++];
+                // Look for host:port
+                do {
+                    newsearch[j++] = searchpath[i++];
+                } while (i<len && searchpath[i] != ':' && searchpath[i] != '/');
+                newsearch[j++] = searchpath[i++];
+                if (searchpath[i] == ':')
+                    i++;
+            }
+        }
+
+        if (searchpath[i] == path_sep) {
+            /* Skip blank path components */
+            if (j && newsearch[j-1] != 0)
+                newsearch[j++] = 0;
+        } else {
+            newsearch[j++] = searchpath[i];
+        }
+    }
+
+    if (j)
+        newsearch[j++] = 0;
+    newsearch[j++] = '.';
+    newsearch[j++] = '/';
+    newsearch[j++] = 0;
+    newsearch[j++] = 0;
+
+    return newsearch;
+}
+
+static char *expand_path(const char *file, char *dirname, int max_s_digits);
+
+mFILE *find_file_url(const char *file, char *url) {
+    char *path = NULL, buf[8192];
+    mFILE *mf = NULL;
+    ssize_t len;
+    hFILE *hf = NULL;
+
+    /* Expand %s for the trace name.  Only one digit is allowed between
+       The % and s to avoid ambiguity with percent-encoded URLs */
+
+    path = expand_path(file, url, 1);
+    if (!path)
+        return NULL;
+
+    if (!(hf = hopen(path, "r"))) {
+        if (errno != ENOENT)
+            hts_log_warning("Failed to open reference \"%s\": %s", path, strerror(errno));
+        goto fail;
+    }
+
+    if (NULL == (mf = mfcreate(NULL, 0)))
+        goto fail;
+    while ((len = hread(hf, buf, sizeof(buf))) > 0) {
+        if (mfwrite(buf, len, 1, mf) <= 0) {
+            hclose_abruptly(hf);
+            goto fail;
+        }
+    }
+    if (hclose(hf) < 0 || len < 0) {
+        hts_log_warning("Failed to read reference \"%s\": %s", path, strerror(errno));
+        goto fail;
+    }
+
+    free(path);
+    mrewind(mf);
+    return mf;
+
+ fail:
+    mfdestroy(mf);
+    free(path);
+    return NULL;
+}
+
+/*
+ * Takes a dirname possibly including % rules and appends the filename
+ * to it.
+ *
+ * Returns expanded pathname or NULL for malloc failure.
+ */
+static char *expand_path(const char *file, char *dirname, int max_s_digits) {
+    size_t len = strlen(dirname);
+    size_t lenf = strlen(file);
+    char *cp, *path;
+
+    path = malloc(len+lenf+2); // worst expansion DIR/FILE
+    if (!path) {
+        hts_log_error("Out of memory");
+        return NULL;
+    }
+
+    if (dirname[len-1] == '/')
+        len--;
+
+    /* Special case for "./" or absolute filenames */
+    if (*file == '/' || (len==1 && *dirname == '.')) {
+        memcpy(path, file, lenf + 1);
+    } else {
+        /* Handle %[0-9]*s expansions, if required */
+        char *path_end = path;
+        *path = 0;
+        while ((cp = strchr(dirname, '%'))) {
+            char *endp;
+            long l = strtol(cp+1, &endp, 10);
+            if (*endp != 's' || endp - cp - 1 > max_s_digits) {
+                strncpy(path_end, dirname, (endp+1)-dirname);
+                path_end += (endp+1)-dirname;
+                dirname = endp+1;
+                continue;
+            }
+
+            strncpy(path_end, dirname, cp-dirname);
+            path_end += cp-dirname;
+            if (l) {
+                strncpy(path_end, file, l);
+                path_end += MIN(strlen(file), l);
+                file     += MIN(strlen(file), l);
+            } else {
+                strcpy(path_end, file);
+                path_end += strlen(file);
+                file     += strlen(file);
+            }
+            len -= (endp+1) - dirname;
+            dirname = endp+1;
+        }
+        strncpy(path_end, dirname, len);
+        path_end += MIN(strlen(dirname), len);
+        *path_end = 0;
+        if (*file) {
+            *path_end++ = '/';
+            strcpy(path_end, file);
+        }
+    }
+
+    //fprintf(stderr, "*PATH=\"%s\"\n", path);
+    return path;
+}
+
+/*
+ * Searches for file in the directory 'dirname'. If it finds it, it opens
+ * it. This also searches for compressed versions of the file in dirname
+ * too.
+ *
+ * Returns mFILE pointer if found
+ *         NULL if not
+ */
+static mFILE *find_file_dir(const char *file, char *dirname) {
+    char *path;
+    mFILE *mf = NULL;
+
+    path = expand_path(file, dirname, INT_MAX);
+    if (!path)
+        return NULL;
+
+    if (is_file(path))
+        mf = mfopen(path, "rbm");
+
+    free(path);
+    return mf;
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * Public functions below.
+ */
+
+/*
+ * Opens a trace file named 'file'. This is initially looked for as a
+ * pathname relative to a file named "relative_to". This may (for
+ * example) be the name of an experiment file referencing the trace
+ * file. In this case by passing relative_to as the experiment file
+ * filename the trace file will be picked up in the same directory as
+ * the experiment file. Relative_to may be supplied as NULL.
+ *
+ * 'file' is looked for at relative_to, then the current directory, and then
+ * all of the locations listed in 'path' (which is a colon separated list).
+ * If 'path' is NULL it uses the RAWDATA environment variable instead.
+ *
+ * Returns a mFILE pointer when found.
+ *           NULL otherwise.
+ */
+mFILE *open_path_mfile(const char *file, char *path, char *relative_to) {
+    char *newsearch;
+    char *ele;
+    mFILE *fp;
+
+    /* Use path first */
+    if (!path)
+        path = getenv("RAWDATA");
+    if (NULL == (newsearch = tokenise_search_path(path)))
+        return NULL;
+
+    /*
+     * Step through the search path testing out each component.
+     * We now look through each path element treating some prefixes as
+     * special, otherwise we treat the element as a directory.
+     */
+    for (ele = newsearch; *ele; ele += strlen(ele)+1) {
+        char *ele2;
+
+        /*
+         * '|' prefixing a path component indicates that we do not
+         * wish to perform the compression extension searching in that
+         * location.
+         *
+         * NB: this has been removed from the htslib implementation.
+         */
+        if (*ele == '|') {
+            ele2 = ele+1;
+        } else {
+            ele2 = ele;
+        }
+
+        if (0 == strncmp(ele2, "URL=", 4)) {
+            if ((fp = find_file_url(file, ele2+4))) {
+                free(newsearch);
+                return fp;
+            }
+        } else if (!strncmp(ele2, "http:", 5) ||
+                   !strncmp(ele2, "https:", 6) ||
+                   !strncmp(ele2, "ftp:", 4)) {
+            if ((fp = find_file_url(file, ele2))) {
+                free(newsearch);
+                return fp;
+            }
+        } else if ((fp = find_file_dir(file, ele2))) {
+            free(newsearch);
+            return fp;
+        }
+    }
+
+    free(newsearch);
+
+    /* Look in the same location as the incoming 'relative_to' filename */
+    if (relative_to) {
+        char *cp;
+        char relative_path[PATH_MAX+1];
+        strcpy(relative_path, relative_to);
+        if ((cp = strrchr(relative_path, '/')))
+            *cp = 0;
+        if ((fp = find_file_dir(file, relative_path)))
+            return fp;
+    }
+
+    return NULL;
+}
+
+
+/*
+ * As per open_path_mfile, but searching only for local filenames.
+ * This is useful as we may avoid doing a full mfopen and loading
+ * the entire file into memory.
+ *
+ * Returns the expanded pathname if found.
+ *         NULL if not
+ */
+char *find_path(const char *file, const char *path) {
+    char *newsearch;
+    char *ele;
+    char *outpath = NULL;
+
+    /* Use path first */
+    if (!path)
+        path = getenv("RAWDATA");
+    if (NULL == (newsearch = tokenise_search_path(path)))
+        return NULL;
+
+    for (ele = newsearch; *ele; ele += strlen(ele)+1) {
+        char *ele2 = (*ele == '|') ? ele+1 : ele;
+
+        if (!strncmp(ele2, "URL=", 4) ||
+            !strncmp(ele2, "http:", 5) ||
+            !strncmp(ele2, "https:", 6) ||
+            !strncmp(ele2, "ftp:", 4)) {
+            continue;
+        } else {
+            outpath = expand_path(file, ele2, INT_MAX);
+            if (is_file(outpath)) {
+                free(newsearch);
+                return outpath;
+            } else {
+                free(outpath);
+            }
+        }
+    }
+
+    free(newsearch);
+
+    return NULL;
+}
--- a/ext/htslib/cram/open_trace_file.h
+++ b/ext/htslib/cram/open_trace_file.h
@ -0,0 +1,125 @@
+/*
+Author: James Bonfield
+
+Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   . Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   . Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   . Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
+MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or
+promote products derived from this software without specific prior written
+permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+Copyright (c) 2008, 2009, 2013, 2018 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OPEN_TRACE_FILE_H
+#define OPEN_TRACE_FILE_H
+
+#include "mFILE.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Tokenises the search path splitting on colons (unix) or semicolons
+ * (windows).
+ * We also  explicitly add a "./" to the end of the search path
+ *
+ * Returns: A new search path with items separated by nul chars. Two nul
+ *          chars in a row represent the end of the tokenised path.
+ * Returns NULL for a failure.
+ *
+ * The returned data has been malloced. It is up to the caller to free this
+ * memory.
+ */
+char *tokenise_search_path(const char *searchpath);
+
+/*
+ * Opens a trace file named 'file'. This is initially looked for as a
+ * pathname relative to a file named "relative_to". This may (for
+ * example) be the name of an experiment file referencing the trace
+ * file. In this case by passing relative_to as the experiment file
+ * filename the trace file will be picked up in the same directory as
+ * the experiment file. Relative_to may be supplied as NULL.
+ *
+ * 'file' is looked for at relative_to, then the current directory, and then
+ * all of the locations listed in 'path' (which is a colon separated list).
+ * If 'path' is NULL it uses the RAWDATA environment variable instead.
+ *
+ * Returns a mFILE pointer when found.
+ *           NULL otherwise.
+ */
+mFILE *open_path_mfile(const char *file, char *path, char *relative_to);
+
+/*
+ * Returns a mFILE containing the entire contents of the url;
+ *         NULL on failure.
+ */
+mFILE *find_file_url(const char *file, char *url);
+
+
+/*
+ * As per open_path_mfile, but searching only for local filenames.
+ * This is useful as we may avoid doing a full mfopen and loading
+ * the entire file into memory.
+ *
+ * Returns the expanded pathname if found.
+ *         NULL if not
+ */
+char *find_path(const char *file, const char *path);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPEN_TRACE_FILE_H */
--- a/ext/htslib/cram/os.h
+++ b/ext/htslib/cram/os.h
@ -0,0 +1,205 @@
+/*
+Copyright (c) 1993, 1995-2002 MEDICAL RESEARCH COUNCIL
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1 Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2 Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
+MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or
+promote products derived from this software without specific prior written
+permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+Copyright (c) 2004, 2006, 2009-2011, 2013, 2017-2018 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * File: os.h
+ *
+ * Author:
+ *         MRC Laboratory of Molecular Biology
+ *         Hills Road
+ *         Cambridge CB2 2QH
+ *         United Kingdom
+ *
+ * Description: operating system specific type definitions
+ *
+ */
+
+#ifndef CRAM_OS_H
+#define CRAM_OS_H
+
+#include <limits.h>
+#include <stdint.h>
+
+#include "../htslib/hts_endian.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*-----------------------------------------------------------------------------
+ * Byte swapping macros
+ */
+
+/*
+ * Our new swap runs at the same speed on Ultrix, but substantially faster
+ * (300% for swap_int4, ~50% for swap_int2) on an Alpha (due to the lack of
+ * decent 'char' support).
+ *
+ * They also have the ability to swap in situ (src == dst). Newer code now
+ * relies on this so don't change back!
+ */
+#define iswap_int8(x)                           \
+    (((x & 0x00000000000000ffLL) << 56) +       \
+     ((x & 0x000000000000ff00LL) << 40) +       \
+     ((x & 0x0000000000ff0000LL) << 24) +       \
+     ((x & 0x00000000ff000000LL) <<  8) +       \
+     ((x & 0x000000ff00000000LL) >>  8) +       \
+     ((x & 0x0000ff0000000000LL) >> 24) +       \
+     ((x & 0x00ff000000000000LL) >> 40) +       \
+     ((x & 0xff00000000000000LL) >> 56))
+
+#define iswap_int4(x)                           \
+    (((x & 0x000000ff) << 24) +                 \
+     ((x & 0x0000ff00) <<  8) +                 \
+     ((x & 0x00ff0000) >>  8) +                 \
+     ((x & 0xff000000) >> 24))
+
+#define iswap_int2(x)                           \
+    (((x & 0x00ff) << 8) +                      \
+     ((x & 0xff00) >> 8))
+
+/*
+ * Linux systems may use byteswap.h to get assembly versions of byte-swap
+ * on intel systems. This can be as trivial as the bswap opcode, which works
+ * out at over 2-times faster than iswap_int4 above.
+ */
+#if 0
+#if defined(__linux__)
+#    include <byteswap.h>
+#    undef iswap_int8
+#    undef iswap_int4
+#    undef iswap_int2
+#    define iswap_int8 bswap_64
+#    define iswap_int4 bswap_32
+#    define iswap_int2 bswap_16
+#endif
+#endif
+
+
+/*
+ * Macros to specify that data read in is of a particular endianness.
+ * The macros here swap to the appropriate order for the particular machine
+ * running the macro and return the new answer. These may also be used when
+ * writing to a file to specify that we wish to write in (eg) big endian
+ * format.
+ *
+ * This leads to efficient code as most of the time these macros are
+ * trivial.
+ */
+#if defined(HTS_BIG_ENDIAN)
+#define le_int4(x) iswap_int4((x))
+#define le_int2(x) iswap_int2((x))
+#elif defined(HTS_LITTLE_ENDIAN)
+#define le_int4(x) (x)
+#define le_int2(x) (x)
+#else
+static inline uint32_t le_int4(uint32_t x) {
+    return le_to_u32((uint8_t *) &x);
+}
+static inline uint16_t le_int2(uint16_t x) {
+    return le_to_u16((uint8_t *) &x);
+}
+#endif
+
+/*-----------------------------------------------------------------------------
+ * Operating system specifics.
+ * These ought to be done by autoconf, but are legacy code.
+ */
+/*
+ * SunOS 4.x
+ * Even though we use the ANSI gcc, we make use the the standard SunOS 4.x
+ * libraries and include files, which are non-ansi
+ */
+#if defined(__sun__) && !defined(__svr4__)
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+#endif
+
+/*
+ * Microsoft Visual C++
+ * Windows
+ */
+#if defined(_MSC_VER)
+#define popen _popen
+#define pclose _pclose
+#define ftruncate(fd,len) _chsize(fd,len)
+#endif
+
+
+/*
+ * Microsoft Windows running MinGW
+ */
+#if defined(__MINGW32__)
+#include <io.h>
+#define mkdir(filename,mode) mkdir((filename))
+#define sysconf(x) 512
+#ifndef ftruncate
+#  define ftruncate(fd,len) _chsize(fd,len)
+#endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CRAM_OS_H */
--- a/ext/htslib/cram/pooled_alloc.c
+++ b/ext/htslib/cram/pooled_alloc.c
@ -0,0 +1,205 @@
+/*
+Copyright (c) 2009, 2013, 2015, 2018-2019 Genome Research Ltd.
+Author: Rob Davies <rmd@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include "pooled_alloc.h"
+#include "misc.h"
+
+//#define DISABLE_POOLED_ALLOC
+//#define TEST_MAIN
+
+#define PSIZE 1024*1024
+
+// credit to http://graphics.stanford.edu/~seander/bithacks.html
+static int next_power_2(unsigned int v) {
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+
+    return v;
+}
+
+/*
+ * Creates a pool.
+ * Pool allocations are approx minimum of 1024*dsize or PSIZE.
+ * (Assumes we're not trying to use pools for >= 2Gb or more)
+ */
+pool_alloc_t *pool_create(size_t dsize) {
+    pool_alloc_t *p;
+
+    if (NULL == (p = (pool_alloc_t *)malloc(sizeof(*p))))
+        return NULL;
+
+    /* Minimum size is a pointer, for free list */
+    dsize = (dsize + sizeof(void *) - 1) & ~(sizeof(void *)-1);
+    if (dsize < sizeof(void *))
+        dsize = sizeof(void *);
+    p->dsize = dsize;
+    p->psize = MIN(PSIZE, next_power_2(p->dsize*1024));
+
+    p->npools = 0;
+    p->pools = NULL;
+    p->free  = NULL;
+
+    return p;
+}
+
+void pool_destroy(pool_alloc_t *p) {
+    size_t i;
+
+    for (i = 0; i < p->npools; i++) {
+        free(p->pools[i].pool);
+    }
+    free(p->pools);
+    free(p);
+}
+
+#ifndef DISABLE_POOLED_ALLOC
+
+static pool_t *new_pool(pool_alloc_t *p) {
+    size_t n = p->psize / p->dsize;
+    pool_t *pool;
+
+    pool = realloc(p->pools, (p->npools + 1) * sizeof(*p->pools));
+    if (NULL == pool) return NULL;
+    p->pools = pool;
+    pool = &p->pools[p->npools];
+
+    pool->pool = malloc(n * p->dsize);
+    if (NULL == pool->pool) return NULL;
+
+    pool->used = 0;
+
+    p->npools++;
+
+    return pool;
+}
+
+void *pool_alloc(pool_alloc_t *p) {
+    pool_t *pool;
+    void *ret;
+
+    /* Look on free list */
+    if (NULL != p->free) {
+        ret = p->free;
+        p->free = *((void **)p->free);
+        return ret;
+    }
+
+    /* Look for space in the last pool */
+    if (p->npools) {
+        pool = &p->pools[p->npools - 1];
+        if (pool->used + p->dsize < p->psize) {
+            ret = ((char *) pool->pool) + pool->used;
+            pool->used += p->dsize;
+            return ret;
+        }
+    }
+
+    /* Need a new pool */
+    pool = new_pool(p);
+    if (NULL == pool) return NULL;
+
+    pool->used = p->dsize;
+    return pool->pool;
+}
+
+void pool_free(pool_alloc_t *p, void *ptr) {
+    *(void **)ptr = p->free;
+    p->free = ptr;
+}
+
+#else
+
+void *pool_alloc(pool_alloc_t *p) {
+    return malloc(p->dsize);
+}
+
+void pool_free(pool_alloc_t *p, void *ptr) {
+    free(ptr);
+}
+
+#endif
+
+#ifdef TEST_MAIN
+typedef struct {
+    int x, y, z;
+} xyz;
+
+#define NP 10000
+int main(void) {
+    int i;
+    xyz *item;
+    xyz **items;
+    pool_alloc_t *p = pool_create(sizeof(xyz));
+
+    items = (xyz **)malloc(NP * sizeof(*items));
+
+    for (i = 0; i < NP; i++) {
+        item = pool_alloc(p);
+        item->x = i;
+        item->y = i+1;
+        item->z = i+2;
+        items[i] = item;
+    }
+
+    for (i = 0; i < NP; i++) {
+        item = items[i];
+        if (i % 3)
+            pool_free(p, item);
+    }
+
+    for (i = 0; i < NP; i++) {
+        item = pool_alloc(p);
+        item->x = 1000000+i;
+        item->y = 1000000+i+1;
+        item->z = 1000000+i+2;
+    }
+
+    for (i = 0; i < NP; i++) {
+        item = items[i];
+        printf("%d\t%d\t%d\t%d\n", i, item->x, item->y, item->z);
+        pool_free(p, item);
+    }
+
+    free(items);
+    return 0;
+}
+#endif
--- a/ext/htslib/cram/pooled_alloc.h
+++ b/ext/htslib/cram/pooled_alloc.h
@ -0,0 +1,66 @@
+/*
+Copyright (c) 2009, 2013, 2018 Genome Research Ltd.
+Author: Rob Davies <rmd@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef POOLED_ALLOC_H
+#define POOLED_ALLOC_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Implements a pooled block allocator where all items are the same size,
+ * but we need many of them.
+ */
+typedef struct {
+    void   *pool;
+    size_t  used;
+} pool_t;
+
+typedef struct {
+    size_t dsize;
+    size_t psize;
+    size_t npools;
+    pool_t *pools;
+    void *free;
+} pool_alloc_t;
+
+pool_alloc_t *pool_create(size_t dsize);
+void pool_destroy(pool_alloc_t *p);
+void *pool_alloc(pool_alloc_t *p);
+void pool_free(pool_alloc_t *p, void *ptr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* POOLED_ALLOC_H */
--- a/ext/htslib/cram/string_alloc.c
+++ b/ext/htslib/cram/string_alloc.c
@ -0,0 +1,162 @@
+/*
+Copyright (c) 2010, 2013, 2018-2019 Genome Research Ltd.
+Author: Andrew Whitwham <aw7@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*
+   A pooled string allocator intended to cut down on the
+   memory overhead of many small string allocations.
+
+   Andrew Whitwham, September 2010.
+*/
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "string_alloc.h"
+
+#define MIN_STR_SIZE 1024
+
+
+/* creates the string pool. max_length is the initial size
+   a single string can be.  The max_length can grow as
+   needed */
+
+string_alloc_t *string_pool_create(size_t max_length) {
+    string_alloc_t *a_str;
+
+    if (NULL == (a_str = (string_alloc_t *)malloc(sizeof(*a_str)))) {
+        return NULL;
+    }
+
+    if (max_length < MIN_STR_SIZE) max_length = MIN_STR_SIZE;
+
+    a_str->nstrings    = 0;
+    a_str->max_strings = 0;
+    a_str->max_length  = max_length;
+    a_str->strings     = NULL;
+
+    return a_str;
+}
+
+
+/* internal function to do the actual memory allocation */
+
+static string_t *new_string_pool(string_alloc_t *a_str) {
+    string_t *str;
+
+    if (a_str->nstrings == a_str->max_strings) {
+        size_t new_max = (a_str->max_strings | (a_str->max_strings >> 2)) + 1;
+        str = realloc(a_str->strings, new_max * sizeof(*a_str->strings));
+
+        if (NULL == str) return NULL;
+
+        a_str->strings = str;
+        a_str->max_strings = new_max;
+    }
+
+    str = &a_str->strings[a_str->nstrings];
+
+    str->str = malloc(a_str->max_length);
+
+    if (NULL == str->str) return NULL;
+
+    str->used = 0;
+    a_str->nstrings++;
+
+    return str;
+}
+
+
+/* free allocated memory */
+
+void string_pool_destroy(string_alloc_t *a_str) {
+    size_t i;
+
+    for (i = 0; i < a_str->nstrings; i++) {
+        free(a_str->strings[i].str);
+    }
+
+    free(a_str->strings);
+    free(a_str);
+}
+
+
+/* allocate space for a string */
+
+char *string_alloc(string_alloc_t *a_str, size_t length) {
+    string_t *str;
+    char *ret;
+
+    if (length <= 0) return NULL;
+
+    // add to last string pool if we have space
+    if (a_str->nstrings) {
+        str = &a_str->strings[a_str->nstrings - 1];
+
+        if (str->used + length < a_str->max_length) {
+            ret = str->str + str->used;
+            str->used += length;
+            return ret;
+        }
+    }
+
+    // increase the max length if needs be
+    if (length > a_str->max_length) a_str->max_length = length;
+
+    // need a new string pool
+    str = new_string_pool(a_str);
+
+    if (NULL == str) return NULL;
+
+    str->used = length;
+    return str->str;
+}
+
+
+/* equivalent to strdup */
+
+char *string_dup(string_alloc_t *a_str, const char *instr) {
+    return string_ndup(a_str, instr, strlen(instr));
+}
+
+char *string_ndup(string_alloc_t *a_str, const char *instr, size_t len) {
+    char *str = string_alloc(a_str, len + 1);
+
+    if (NULL == str) return NULL;
+
+    memcpy(str, instr, len);
+    str[len] = 0;
+
+    return str;
+}
--- a/ext/htslib/cram/string_alloc.h
+++ b/ext/htslib/cram/string_alloc.h
@ -0,0 +1,69 @@
+/*
+Copyright (c) 2010, 2013, 2018 Genome Research Ltd.
+Author: Andrew Whitwham <aw7@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef STRING_ALLOC_H
+#define STRING_ALLOC_H
+
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * A pooled string allocator intended to cut down on the
+ * memory overhead of many small string allocations.
+ *
+ * Andrew Whitwham, September 2010.
+ */
+
+typedef struct {
+    char *str;
+    size_t used;
+} string_t;
+
+typedef struct {
+    size_t max_length;
+    size_t nstrings;
+    size_t max_strings;
+    string_t *strings;
+} string_alloc_t;
+
+string_alloc_t *string_pool_create(size_t max_length);
+void string_pool_destroy(string_alloc_t *a_str);
+char *string_alloc(string_alloc_t *a_str, size_t length);
+char *string_dup(string_alloc_t *a_str, const char *instr);
+char *string_ndup(string_alloc_t *a_str, const char *instr, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/ext/htslib/errmod.c
+++ b/ext/htslib/errmod.c
@ -0,0 +1,208 @@
+/*  errmod.c -- revised MAQ error model.
+
+    Copyright (C) 2010 Broad Institute.
+    Copyright (C) 2012, 2013, 2016-2017, 2019 Genome Research Ltd.
+
+    Author: Heng Li <lh3@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <math.h>
+#include "htslib/hts.h"
+#include "htslib/ksort.h"
+#include "htslib/hts_os.h" // for drand48
+
+KSORT_INIT_STATIC_GENERIC(uint16_t)
+
+struct errmod_t {
+    double depcorr;
+    /* table of constants generated for given depcorr and eta */
+    double *fk, *beta, *lhet;
+};
+
+typedef struct {
+    double fsum[16], bsum[16];
+    uint32_t c[16];
+} call_aux_t;
+
+/* \Gamma(n) = (n-1)! */
+#define lfact(n) lgamma(n+1)
+
+/* generates a success * trials table of bionomial probability densities (log transformed) */
+static double* logbinomial_table( const int n_size )
+{
+    /* prob distribution for binom var is p(k) = {n! \over k! (n-k)! } p^k (1-p)^{n-k} */
+    /* this calcs p(k) = {log(n!) - log(k!) - log((n-k)!) */
+    int k, n;
+    double *logbinom = (double*)calloc(n_size * n_size, sizeof(double));
+    if (!logbinom) return NULL;
+    for (n = 1; n < n_size; ++n) {
+        double lfn = lfact(n);
+        for (k = 1; k <= n; ++k)
+            logbinom[n<<8|k] = lfn - lfact(k) - lfact(n-k);
+    }
+    return logbinom;
+}
+
+static int cal_coef(errmod_t *em, double depcorr, double eta)
+{
+    int k, n, q;
+    double sum, sum1;
+    double *lC;
+
+    // initialize ->fk
+    em->fk = (double*)calloc(256, sizeof(double));
+    if (!em->fk) return -1;
+    em->fk[0] = 1.0;
+    for (n = 1; n < 256; ++n)
+        em->fk[n] = pow(1. - depcorr, n) * (1.0 - eta) + eta;
+
+    // initialize ->beta
+    em->beta = (double*)calloc(256 * 256 * 64, sizeof(double));
+    if (!em->beta) return -1;
+
+    lC = logbinomial_table( 256 );
+    if (!lC) return -1;
+
+    for (q = 1; q < 64; ++q) {
+        double e = pow(10.0, -q/10.0);
+        double le = log(e);
+        double le1 = log(1.0 - e);
+        for (n = 1; n <= 255; ++n) {
+            double *beta = em->beta + (q<<16|n<<8);
+            sum1 = lC[n<<8|n] + n*le;
+            beta[n] = HUGE_VAL;
+            for (k = n - 1; k >= 0; --k, sum1 = sum) {
+                sum = sum1 + log1p(exp(lC[n<<8|k] + k*le + (n-k)*le1 - sum1));
+                beta[k] = -10. / M_LN10 * (sum1 - sum);
+            }
+        }
+    }
+
+    // initialize ->lhet
+    em->lhet = (double*)calloc(256 * 256, sizeof(double));
+    if (!em->lhet) {
+        free(lC);
+        return -1;
+    }
+    for (n = 0; n < 256; ++n)
+        for (k = 0; k < 256; ++k)
+            em->lhet[n<<8|k] = lC[n<<8|k] - M_LN2 * n;
+    free(lC);
+    return 0;
+}
+
+/**
+ * Create errmod_t object with obj.depcorr set to depcorr and initialise
+ */
+errmod_t *errmod_init(double depcorr)
+{
+    errmod_t *em;
+    em = (errmod_t*)calloc(1, sizeof(errmod_t));
+    if (!em) return NULL;
+    em->depcorr = depcorr;
+    cal_coef(em, depcorr, 0.03);
+    return em;
+}
+
+/**
+ * Deallocate an errmod_t object
+ */
+void errmod_destroy(errmod_t *em)
+{
+    if (em == 0) return;
+    free(em->lhet); free(em->fk); free(em->beta);
+    free(em);
+}
+
+//
+// em: error model to fit to data
+// m: number of alleles across all samples
+// n: number of bases observed in sample
+// bases[i]: bases observed in pileup [6 bit quality|1 bit strand|4 bit base]
+// q[i*m+j]: (Output) phred-scaled likelihood of each genotype (i,j)
+int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q)
+{
+    // Aux
+    // aux.c is total count of each base observed (ignoring strand)
+    call_aux_t aux;
+    // Loop variables
+    int i, j, k;
+    // The total count of each base observed per strand
+    int w[32];
+
+    memset(q, 0, m * m * sizeof(float)); // initialise q to 0
+    if (n == 0) return 0;
+    // This section randomly downsamples to 255 depth so as not to go beyond our precalculated matrix
+    if (n > 255) { // if we exceed 255 bases observed then shuffle them to sample and only keep the first 255
+        ks_shuffle(uint16_t, n, bases);
+        n = 255;
+    }
+    ks_introsort(uint16_t, n, bases);
+    /* zero out w and aux */
+    memset(w, 0, 32 * sizeof(int));
+    memset(&aux, 0, sizeof(call_aux_t));
+
+    for (j = n - 1; j >= 0; --j) { // calculate esum and fsum
+        uint16_t b = bases[j];
+        /* extract quality and cap at 63 */
+        int qual = b>>5 < 4? 4 : b>>5;
+        if (qual > 63) qual = 63;
+        /* extract base ORed with strand */
+        int basestrand = b&0x1f;
+        /* extract base */
+        int base = b&0xf;
+        aux.fsum[base] += em->fk[w[basestrand]];
+        aux.bsum[base] += em->fk[w[basestrand]] * em->beta[qual<<16|n<<8|aux.c[base]];
+        ++aux.c[base];
+        ++w[basestrand];
+    }
+
+    // generate likelihood
+    for (j = 0; j < m; ++j) {
+        float tmp1, tmp3;
+        int tmp2;
+        // homozygous
+        for (k = 0, tmp1 = tmp3 = 0.0, tmp2 = 0; k < m; ++k) {
+            if (k == j) continue;
+            tmp1 += aux.bsum[k]; tmp2 += aux.c[k]; tmp3 += aux.fsum[k];
+        }
+        if (tmp2) {
+            q[j*m+j] = tmp1;
+        }
+        // heterozygous
+        for (k = j + 1; k < m; ++k) {
+            int cjk = aux.c[j] + aux.c[k];
+            for (i = 0, tmp2 = 0, tmp1 = tmp3 = 0.0; i < m; ++i) {
+                if (i == j || i == k) continue;
+                tmp1 += aux.bsum[i]; tmp2 += aux.c[i]; tmp3 += aux.fsum[i];
+            }
+            if (tmp2) {
+                q[j*m+k] = q[k*m+j] = -4.343 * em->lhet[cjk<<8|aux.c[k]] + tmp1;
+            } else q[j*m+k] = q[k*m+j] = -4.343 * em->lhet[cjk<<8|aux.c[k]]; // all the bases are either j or k
+        }
+        /* clamp to greater than 0 */
+        for (k = 0; k < m; ++k) if (q[j*m+k] < 0.0) q[j*m+k] = 0.0;
+    }
+
+    return 0;
+}
--- a/ext/htslib/faidx.5
+++ b/ext/htslib/faidx.5
@ -0,0 +1,238 @@
+'\" t
+.TH faidx 5 "June 2018" "htslib" "Bioinformatics formats"
+.SH NAME
+faidx \- an index enabling random access to FASTA and FASTQ files
+.\"
+.\" Copyright (C) 2013, 2015, 2018 Genome Research Ltd.
+.\"
+.\" Author: John Marshall <jm18@sanger.ac.uk>
+.\"
+.\" Permission is hereby granted, free of charge, to any person obtaining a
+.\" copy of this software and associated documentation files (the "Software"),
+.\" to deal in the Software without restriction, including without limitation
+.\" the rights to use, copy, modify, merge, publish, distribute, sublicense,
+.\" and/or sell copies of the Software, and to permit persons to whom the
+.\" Software is furnished to do so, subject to the following conditions:
+.\"
+.\" The above copyright notice and this permission notice shall be included in
+.\" all copies or substantial portions of the Software.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+.\" DEALINGS IN THE SOFTWARE.
+.\"
+.SH SYNOPSIS
+.IR file.fa .fai,
+.IR file.fasta .fai,
+.IR file.fq .fai,
+.IR file.fastq .fai
+.SH DESCRIPTION
+Using an \fBfai index\fP file in conjunction with a FASTA/FASTQ file containing
+reference sequences enables efficient access to arbitrary regions within
+those reference sequences.
+The index file typically has the same filename as the corresponding FASTA/FASTQ
+file, with \fB.fai\fP appended.
+.P
+An \fBfai index\fP file is a text file consisting of lines each with
+five TAB-delimited columns for a FASTA file and six for FASTQ:
+.TS
+lbl.
+NAME	Name of this reference sequence
+LENGTH	Total length of this reference sequence, in bases
+OFFSET	Offset in the FASTA/FASTQ file of this sequence's first base
+LINEBASES	The number of bases on each line
+LINEWIDTH	The number of bytes in each line, including the newline
+QUALOFFSET	Offset of sequence's first quality within the FASTQ file
+.TE
+.P
+The \fBNAME\fP and \fBLENGTH\fP columns contain the same
+data as would appear in the \fBSN\fP and \fBLN\fP fields of a
+SAM \fB@SQ\fP header for the same reference sequence.
+.P
+The \fBOFFSET\fP column contains the offset within the FASTA/FASTQ file, in
+bytes starting from zero, of the first base of this reference sequence, i.e., of
+the character following the newline at the end of the header line (the 
+"\fB>\fP" line in FASTA, "\fB@\fP" in FASTQ). Typically the lines of a
+\fBfai index\fP file appear in the order in which the reference sequences
+appear in the FASTA/FASTQ file, so \fB.fai\fP files are typically sorted
+according to this column.
+.P
+The \fBLINEBASES\fP column contains the number of bases in each of the sequence
+lines that form the body of this reference sequence, apart from the final line
+which may be shorter.
+The \fBLINEWIDTH\fP column contains the number of \fIbytes\fP in each of
+the sequence lines (except perhaps the final line), thus differing from
+\fBLINEBASES\fP in that it also counts the bytes forming the line terminator.
+.P
+The \fBQUALOFFSET\fP works the same way as \fBOFFSET\fP but for the first
+quality score of this reference sequence.  This would be the first character
+following the newline at the end of the "\fB+\fP" line.  For FASTQ files only.
+.SS FASTA Files
+In order to be indexed with \fBsamtools faidx\fP, a FASTA file must be a text
+file of the form
+.LP
+.RS
+.RI > name
+.RI [ description ...]
+.br
+ATGCATGCATGCATGCATGCATGCATGCAT
+.br
+GCATGCATGCATGCATGCATGCATGCATGC
+.br
+ATGCAT
+.br
+.RI > name
+.RI [ description ...]
+.br
+ATGCATGCATGCAT
+.br
+GCATGCATGCATGC
+.br
+[...]
+.RE
+.LP
+In particular, each reference sequence must be "well-formatted", i.e., all
+of its sequence lines must be the same length, apart from the final sequence
+line which may be shorter.
+(While this sequence line length must be the same within each sequence,
+it may vary between different reference sequences in the same FASTA file.)
+.P
+This also means that although the FASTA file may have Unix- or Windows-style
+or other line termination, the newline characters present must be consistent,
+at least within each reference sequence.
+.P
+The \fBsamtools\fP implementation uses the first word of the "\fB>\fP" header
+line text (i.e., up to the first whitespace character, having skipped any
+initial whitespace after the ">") as the \fBNAME\fP column.
+.SS FASTQ Files
+FASTQ files for indexing work in the same way as the FASTA files.
+.LP
+.RS
+.RI @ name
+.RI [ description...]
+.br
+ATGCATGCATGCATGCATGCATGCATGCAT
+.br
+GCATGCATGCATGCATGCATGCATGCATGC
+.br
+ATGCAT
+.br
+.RI +
+.br
+FFFA@@FFFFFFFFFFHHB:::@BFFFFGG
+.br
+HIHIIIIIIIIIIIIIIIIIIIIIIIFFFF
+.br
+8011<<
+.br
+.RI @ name
+.RI [ description...]
+.br
+ATGCATGCATGCAT
+.br
+GCATGCATGCATGC
+.br
+.RI +
+.br
+IIA94445EEII==
+.br
+=>IIIIIIIIICCC
+.br
+[...]
+.RE
+.LP
+Quality lines must be wrapped at the same length as the corresponding
+sequence lines.
+.SH EXAMPLE
+For example, given this FASTA file
+.LP
+.RS
+>one
+.br
+ATGCATGCATGCATGCATGCATGCATGCAT
+.br
+GCATGCATGCATGCATGCATGCATGCATGC
+.br
+ATGCAT
+.br
+>two another chromosome
+.br
+ATGCATGCATGCAT
+.br
+GCATGCATGCATGC
+.br
+.RE
+.LP
+formatted with Unix-style (LF) line termination, the corresponding fai index
+would be
+.RS
+.TS
+lnnnn.
+one	66	5	30	31
+two	28	98	14	15
+.TE
+.RE
+.LP
+If the FASTA file were formatted with Windows-style (CR-LF) line termination,
+the fai index would be
+.RS
+.TS
+lnnnn.
+one	66	6	30	32
+two	28	103	14	16
+.TE
+.RE
+.LP
+An example FASTQ file
+.LP
+.RS
+@fastq1
+.br
+ATGCATGCATGCATGCATGCATGCATGCAT
+.br
+GCATGCATGCATGCATGCATGCATGCATGC
+.br
+ATGCAT
+.br
+
+.br
+FFFA@@FFFFFFFFFFHHB:::@BFFFFGG
+.br
+HIHIIIIIIIIIIIIIIIIIIIIIIIFFFF
+.br
+8011<<
+.br
+@fastq2
+.br
+ATGCATGCATGCAT
+.br
+GCATGCATGCATGC
+.br
+
+.br
+IIA94445EEII==
+.br
+=>IIIIIIIIICCC
+.br
+.RE
+.LP
+Formatted with Unix-style line termination would give this fai index
+.RS
+.TS
+lnnnnn.
+fastq1	66	8	30	31	79
+fastq2	28	156	14	15	188
+.TE
+.RE
+.SH SEE ALSO
+.IR samtools (1)
+.TP
+https://en.wikipedia.org/wiki/FASTA_format
+.TP
+https://en.wikipedia.org/wiki/FASTQ_format
+
+Further description of the FASTA and FASTQ formats
--- a/ext/htslib/faidx.c
+++ b/ext/htslib/faidx.c
--- a/ext/htslib/fuzz_settings.h
+++ b/ext/htslib/fuzz_settings.h
@ -0,0 +1,35 @@
+/*  fuzz_settings.h -- fuzz-tester specific definitions
+
+    Copyright (C) 2023 Genome Research Ltd.
+
+    Author: Rob Davies <rmd@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#ifndef HTSLIB_FUZZ_SETTINGS_H
+#define HTSLIB_FUZZ_SETTINGS_H
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+
+#ifndef FUZZ_ALLOC_LIMIT
+// By default libfuzzer reports out-of-memory on allocations > 2 Gbytes
+#define FUZZ_ALLOC_LIMIT 2000000000ULL
+#endif
+
+#endif
+#endif
--- a/ext/htslib/header.c
+++ b/ext/htslib/header.c
--- a/ext/htslib/header.h
+++ b/ext/htslib/header.h
@ -0,0 +1,319 @@
+/*
+Copyright (c) 2013-2019 Genome Research Ltd.
+Authors: James Bonfield <jkb@sanger.ac.uk>, Valeriu Ohan <vo2@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*! \file
+ * SAM header parsing.
+ *
+ * These functions can be shared between SAM, BAM and CRAM file
+ * formats as all three internally use the same string encoding for
+ * header fields.
+ */
+
+
+#ifndef HEADER_H_
+#define HEADER_H_
+
+#include <stdarg.h>
+
+#include "cram/string_alloc.h"
+#include "cram/pooled_alloc.h"
+
+#include "htslib/khash.h"
+#include "htslib/kstring.h"
+#include "htslib/sam.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*! Make a single integer out of a two-letter type code */
+static inline khint32_t TYPEKEY(const char *type) {
+    unsigned int u0 = (unsigned char) type[0];
+    unsigned int u1 = (unsigned char) type[1];
+    return (u0 << 8) | u1;
+}
+
+/*
+ * Proposed new SAM header parsing
+
+1 @SQ ID:foo LN:100
+2 @SQ ID:bar LN:200
+3 @SQ ID:ram LN:300 UR:xyz
+4 @RG ID:r ...
+5 @RG ID:s ...
+
+Hash table for 2-char @keys without dup entries.
+If dup lines, we form a circular linked list. Ie hash keys = {RG, SQ}.
+
+HASH("SQ")--\
+            |
+    (3) <-> 1 <-> 2 <-> 3 <-> (1)
+
+HASH("RG")--\
+            |
+    (5) <-> 4 <-> 5 <-> (4)
+
+Items stored in the hash values also form their own linked lists:
+Ie SQ->ID(foo)->LN(100)
+   SQ->ID(bar)->LN(200)
+   SQ->ID(ram)->LN(300)->UR(xyz)
+   RG->ID(r)
+ */
+
+/*! A single key:value pair on a header line
+ *
+ * These form a linked list and hold strings. The strings are
+ * allocated from a string_alloc_t pool referenced in the master
+ * sam_hrecs_t structure. Do not attempt to free, malloc or manipulate
+ * these strings directly.
+ */
+typedef struct sam_hrec_tag_s {
+    struct sam_hrec_tag_s *next;
+    const char *str;
+    int   len;
+} sam_hrec_tag_t;
+
+/*! The parsed version of the SAM header string.
+ *
+ * Each header type (SQ, RG, HD, etc) points to its own sam_hdr_type
+ * struct via the main hash table h in the sam_hrecs_t struct.
+ *
+ * These in turn consist of circular bi-directional linked lists (ie
+ * rings) to hold the multiple instances of the same header type
+ * code. For example if we have 5 \@SQ lines the primary hash table
+ * will key on \@SQ pointing to the first sam_hdr_type and that in turn
+ * will be part of a ring of 5 elements.
+ *
+ * For each sam_hdr_type structure we also point to a sam_hdr_tag
+ * structure which holds the tokenised attributes; the tab separated
+ * key:value pairs per line.
+ */
+typedef struct sam_hrec_type_s {
+    struct sam_hrec_type_s *next; // circular list of this type
+    struct sam_hrec_type_s *prev; // circular list of this type
+    struct sam_hrec_type_s *global_next; // circular list of all lines
+    struct sam_hrec_type_s *global_prev; // circular list of all lines
+    sam_hrec_tag_t *tag;          // first tag
+    khint32_t type;               // Two-letter type code as an int
+} sam_hrec_type_t;
+
+/*! Parsed \@SQ lines */
+typedef struct {
+    const char *name;
+    hts_pos_t len;
+    sam_hrec_type_t *ty;
+} sam_hrec_sq_t;
+
+/*! Parsed \@RG lines */
+typedef struct {
+    const char *name;
+    sam_hrec_type_t *ty;
+    int name_len;
+    int id;           // numerical ID
+} sam_hrec_rg_t;
+
+/*! Parsed \@PG lines */
+typedef struct {
+    const char *name;
+    sam_hrec_type_t *ty;
+    int name_len;
+    int id;           // numerical ID
+    int prev_id;      // -1 if none
+} sam_hrec_pg_t;
+
+
+/*! Sort order parsed from @HD line */
+enum sam_sort_order {
+    ORDER_UNKNOWN  =-1,
+    ORDER_UNSORTED = 0,
+    ORDER_NAME     = 1,
+    ORDER_COORD    = 2
+  //ORDER_COLLATE  = 3 // maybe one day!
+};
+
+enum sam_group_order {
+    ORDER_NONE      =-1,
+    ORDER_QUERY     = 0,
+    ORDER_REFERENCE = 1
+};
+
+KHASH_MAP_INIT_INT(sam_hrecs_t, sam_hrec_type_t*)
+KHASH_MAP_INIT_STR(m_s2i, int)
+
+/*! Primary structure for header manipulation
+ *
+ * The initial header text is held in the text kstring_t, but is also
+ * parsed out into SQ, RG and PG arrays. These have a hash table
+ * associated with each to allow lookup by ID or SN fields instead of
+ * their numeric array indices. Additionally PG has an array to hold
+ * the linked list start points (the last in a PP chain).
+ *
+ * Use the appropriate sam_hdr_* functions to edit the header, and
+ * call sam_hdr_rebuild() any time the textual form needs to be
+ * updated again.
+ */
+struct sam_hrecs_t {
+    khash_t(sam_hrecs_t) *h;
+    sam_hrec_type_t *first_line; //!< First line (usually @HD)
+    string_alloc_t *str_pool; //!< Pool of sam_hdr_tag->str strings
+    pool_alloc_t   *type_pool;//!< Pool of sam_hdr_type structs
+    pool_alloc_t   *tag_pool; //!< Pool of sam_hdr_tag structs
+
+    // @SQ lines / references
+    int nref;                  //!< Number of \@SQ lines
+    int ref_sz;                //!< Number of entries available in ref[]
+    sam_hrec_sq_t *ref;        //!< Array of parsed \@SQ lines
+    khash_t(m_s2i) *ref_hash;  //!< Maps SQ SN field to ref[] index
+
+    // @RG lines / read-groups
+    int nrg;                   //!< Number of \@RG lines
+    int rg_sz;                 //!< number of entries available in rg[]
+    sam_hrec_rg_t *rg;         //!< Array of parsed \@RG lines
+    khash_t(m_s2i) *rg_hash;   //!< Maps RG ID field to rg[] index
+
+    // @PG lines / programs
+    int npg;                   //!< Number of \@PG lines
+    int pg_sz;                //!< Number of entries available in pg[]
+    int npg_end;               //!< Number of terminating \@PG lines
+    int npg_end_alloc;         //!< Size of pg_end field
+    sam_hrec_pg_t *pg;         //!< Array of parsed \@PG lines
+    khash_t(m_s2i) *pg_hash;   //!< Maps PG ID field to pg[] index
+    int *pg_end;               //!< \@PG chain termination IDs
+
+    // @cond internal
+    char *ID_buf;             // temporary buffer for sam_hdr_pg_id
+    uint32_t ID_buf_sz;
+    int ID_cnt;
+    // @endcond
+
+    int dirty;                // marks the header as modified, so it can be rebuilt
+    int refs_changed;         // Index of first changed ref (-1 if unchanged)
+    int pgs_changed;          // New PG line added
+    int type_count;
+    char (*type_order)[3];
+};
+
+/*!
+ * Method for parsing the header text and populating the
+ * internal hash tables. After calling this method, the
+ * parsed representation becomes the single source of truth.
+ *
+ * @param bh    Header structure, previously initialised by a
+ *              sam_hdr_init call
+ * @return      0 on success, -1 on failure
+ */
+int sam_hdr_fill_hrecs(sam_hdr_t *bh);
+
+/*!
+ * Reconstructs the text representation of the header from
+ * the hash table data after a change has been performed on
+ * the header.
+ *
+ * @return  0 on success, -1 on failure
+ */
+int sam_hdr_rebuild(sam_hdr_t *bh);
+
+/*! Creates an empty SAM header, ready to be populated.
+ *
+ * @return
+ * Returns a sam_hrecs_t struct on success (free with sam_hrecs_free())
+ *         NULL on failure
+ */
+sam_hrecs_t *sam_hrecs_new(void);
+
+/*! Produces a duplicate copy of hrecs and returns it.
+ * @return
+ * Returns NULL on failure
+ */
+sam_hrecs_t *sam_hrecs_dup(sam_hrecs_t *hrecs);
+
+/*! Update sam_hdr_t target_name and target_len arrays
+ *
+ *  sam_hdr_t and sam_hrecs_t are specified separately so that sam_hdr_dup
+ *  can use it to construct target arrays from the source header.
+ *
+ *  @return 0 on success; -1 on failure
+ */
+int sam_hdr_update_target_arrays(sam_hdr_t *bh, const sam_hrecs_t *hrecs,
+                                 int refs_changed);
+
+/*! Reconstructs a kstring from the header hash table.
+ *
+ * @return
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int sam_hrecs_rebuild_text(const sam_hrecs_t *hrecs, kstring_t *ks);
+
+/*! Deallocates all storage used by a sam_hrecs_t struct.
+ *
+ * This also decrements the header reference count. If after decrementing
+ * it is still non-zero then the header is assumed to be in use by another
+ * caller and the free is not done.
+ */
+void sam_hrecs_free(sam_hrecs_t *hrecs);
+
+/*!
+ * @return
+ * Returns the first header item matching 'type'. If ID is non-NULL it checks
+ * for the tag ID: and compares against the specified ID.
+ *
+ * Returns NULL if no type/ID is found
+ */
+sam_hrec_type_t *sam_hrecs_find_type_id(sam_hrecs_t *hrecs, const char *type,
+                                     const char *ID_key, const char *ID_value);
+
+sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type,
+                                   const char *key,
+                                   sam_hrec_tag_t **prev);
+
+int sam_hrecs_remove_key(sam_hrecs_t *hrecs,
+                         sam_hrec_type_t *type,
+                         const char *key);
+
+/*! Looks up a read-group by name and returns a pointer to the start of the
+ * associated tag list.
+ *
+ * @return
+ * Returns NULL on failure
+ */
+sam_hrec_rg_t *sam_hrecs_find_rg(sam_hrecs_t *hrecs, const char *rg);
+
+/*! Returns the sort order from the @HD SO: field */
+enum sam_sort_order sam_hrecs_sort_order(sam_hrecs_t *hrecs);
+
+/*! Returns the group order from the @HD SO: field */
+enum sam_group_order sam_hrecs_group_order(sam_hrecs_t *hrecs);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HEADER_H_ */
--- a/ext/htslib/hfile.c
+++ b/ext/htslib/hfile.c
--- a/ext/htslib/hfile_gcs.c
+++ b/ext/htslib/hfile_gcs.c
@ -0,0 +1,160 @@
+/*  hfile_gcs.c -- Google Cloud Storage backend for low-level file streams.
+
+    Copyright (C) 2016, 2021 Genome Research Ltd.
+
+    Author: John Marshall <jm18@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "htslib/hts.h"
+#include "htslib/kstring.h"
+#include "hfile_internal.h"
+#ifdef ENABLE_PLUGINS
+#include "version.h"
+#endif
+
+static hFILE *
+gcs_rewrite(const char *gsurl, const char *mode, int mode_has_colon,
+            va_list *argsp)
+{
+    const char *bucket, *path, *access_token, *requester_pays_project;
+    kstring_t mode_colon = { 0, 0, NULL };
+    kstring_t url = { 0, 0, NULL };
+    kstring_t auth_hdr = { 0, 0, NULL };
+    kstring_t requester_pays_hdr = { 0, 0, NULL };
+    hFILE *fp = NULL;
+
+    // GCS URL format is gs[+SCHEME]://BUCKET/PATH
+
+    if (gsurl[2] == '+') {
+        bucket = strchr(gsurl, ':') + 1;
+        kputsn(&gsurl[3], bucket - &gsurl[3], &url);
+    }
+    else {
+        kputs("https:", &url);
+        bucket = &gsurl[3];
+    }
+    while (*bucket == '/') kputc(*bucket++, &url);
+
+    path = bucket + strcspn(bucket, "/?#");
+
+    kputsn(bucket, path - bucket, &url);
+    if (strchr(mode, 'r')) kputs(".storage-download", &url);
+    else if (strchr(mode, 'w')) kputs(".storage-upload", &url);
+    else kputs(".storage", &url);
+    kputs(".googleapis.com", &url);
+
+    kputs(path, &url);
+
+    if (hts_verbose >= 8)
+        fprintf(stderr, "[M::gcs_open] rewrote URL as %s\n", url.s);
+
+    // TODO Find the access token in a more standard way
+    access_token = getenv("GCS_OAUTH_TOKEN");
+
+    if (access_token) {
+        kputs("Authorization: Bearer ", &auth_hdr);
+        kputs(access_token, &auth_hdr);
+    }
+
+    requester_pays_project = getenv("GCS_REQUESTER_PAYS_PROJECT");
+
+    if (requester_pays_project) {
+        kputs("X-Goog-User-Project: ", &requester_pays_hdr);
+        kputs(requester_pays_project, &requester_pays_hdr);
+    }
+
+    if (argsp || mode_has_colon || auth_hdr.l > 0 || requester_pays_hdr.l > 0) {
+        if (! mode_has_colon) {
+            kputs(mode, &mode_colon);
+            kputc(':', &mode_colon);
+            mode = mode_colon.s;
+        }
+
+        if (auth_hdr.l > 0 && requester_pays_hdr.l > 0) {
+            fp = hopen(
+                url.s, mode, "va_list", argsp,
+                   "httphdr:l",
+                   auth_hdr.s,
+                   requester_pays_hdr.s,
+                   NULL,
+                   NULL
+            );
+
+        }
+        else {
+            fp = hopen(url.s, mode, "va_list", argsp,
+                       "httphdr", (auth_hdr.l > 0)? auth_hdr.s : NULL, NULL);
+        }
+    }
+    else
+        fp = hopen(url.s, mode);
+
+    free(mode_colon.s);
+    free(url.s);
+    free(auth_hdr.s);
+    free(requester_pays_hdr.s);
+    return fp;
+}
+
+static hFILE *gcs_open(const char *url, const char *mode)
+{
+    return gcs_rewrite(url, mode, 0, NULL);
+}
+
+static hFILE *gcs_vopen(const char *url, const char *mode_colon, va_list args0)
+{
+    // Need to use va_copy() as we can only take the address of an actual
+    // va_list object, not that of a parameter as its type may have decayed.
+    va_list args;
+    va_copy(args, args0);
+    hFILE *fp = gcs_rewrite(url, mode_colon, 1, &args);
+    va_end(args);
+    return fp;
+}
+
+int PLUGIN_GLOBAL(hfile_plugin_init,_gcs)(struct hFILE_plugin *self)
+{
+    static const struct hFILE_scheme_handler handler =
+        { gcs_open, hfile_always_remote, "Google Cloud Storage",
+          2000 + 50, gcs_vopen
+        };
+
+#ifdef ENABLE_PLUGINS
+    // Embed version string for examination via strings(1) or what(1)
+    static const char id[] = "@(#)hfile_gcs plugin (htslib)\t" HTS_VERSION_TEXT;
+    if (hts_verbose >= 9)
+        fprintf(stderr, "[M::hfile_gcs.init] version %s\n", strchr(id, '\t')+1);
+#endif
+
+    self->name = "Google Cloud Storage";
+    hfile_add_scheme_handler("gs", &handler);
+    hfile_add_scheme_handler("gs+http", &handler);
+    hfile_add_scheme_handler("gs+https", &handler);
+    return 0;
+}
--- a/ext/htslib/hfile_internal.h
+++ b/ext/htslib/hfile_internal.h
@ -0,0 +1,209 @@
+/*  hfile_internal.h -- internal parts of low-level input/output streams.
+
+    Copyright (C) 2013-2016, 2019 Genome Research Ltd.
+
+    Author: John Marshall <jm18@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#ifndef HFILE_INTERNAL_H
+#define HFILE_INTERNAL_H
+
+#include <stdarg.h>
+
+#include "htslib/hts_defs.h"
+#include "htslib/hfile.h"
+
+#include "textutils_internal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+  @abstract  Resizes the buffer within an hFILE.
+
+  @notes  Changes the buffer size for an hFILE.  Ideally this is done
+  immediately after opening.  If performed later, this function may
+  fail if we are reducing the buffer size and the current offset into
+  the buffer is beyond the new capacity.
+
+  @param fp        The file stream
+  @param bufsiz    The size of the new buffer
+
+  @return Returns 0 on success, -1 on failure.
+ */
+int hfile_set_blksize(hFILE *fp, size_t bufsiz);
+
+struct BGZF;
+/*!
+  @abstract Return the hFILE connected to a BGZF
+ */
+struct hFILE *bgzf_hfile(struct BGZF *fp);
+
+/*!
+  @abstract Closes all hFILE plugins that have been loaded
+*/
+void hfile_shutdown(int do_close_plugin);
+
+struct hFILE_backend {
+    /* As per read(2), returning the number of bytes read (possibly 0) or
+       negative (and setting errno) on errors.  Front-end code will call this
+       repeatedly if necessary to attempt to get the desired byte count.  */
+    ssize_t (*read)(hFILE *fp, void *buffer, size_t nbytes) HTS_RESULT_USED;
+
+    /* As per write(2), returning the number of bytes written or negative (and
+       setting errno) on errors.  Front-end code will call this repeatedly if
+       necessary until the desired block is written or an error occurs.  */
+    ssize_t (*write)(hFILE *fp, const void *buffer, size_t nbytes)
+        HTS_RESULT_USED;
+
+    /* As per lseek(2), returning the resulting offset within the stream or
+       negative (and setting errno) on errors.  */
+    off_t (*seek)(hFILE *fp, off_t offset, int whence) HTS_RESULT_USED;
+
+    /* Performs low-level flushing, if any, e.g., fsync(2); for writing streams
+       only.  Returns 0 for success or negative (and sets errno) on errors. */
+    int (*flush)(hFILE *fp) HTS_RESULT_USED;
+
+    /* Closes the underlying stream (for output streams, the buffer will
+       already have been flushed), returning 0 for success or negative (and
+       setting errno) on errors, as per close(2).  */
+    int (*close)(hFILE *fp) HTS_RESULT_USED;
+};
+
+/* May be called by hopen_*() functions to decode a fopen()-style mode into
+   open(2)-style flags.  */
+HTSLIB_EXPORT
+int hfile_oflags(const char *mode);
+
+/* Must be called by hopen_*() functions to allocate the hFILE struct and set
+   up its base.  Capacity is a suggested buffer size (e.g., via fstat(2))
+   or 0 for a default-sized buffer.  */
+HTSLIB_EXPORT
+hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity);
+
+/* Alternative to hfile_init() for in-memory backends for which the base
+   buffer is the only storage.  Buffer is already allocated via malloc(2)
+   of size buf_size and with buf_filled bytes already filled.  Ownership
+   of the buffer is transferred to the resulting hFILE.  */
+hFILE *hfile_init_fixed(size_t struct_size, const char *mode,
+                        char *buffer, size_t buf_filled, size_t buf_size);
+
+/* May be called by hopen_*() functions to undo the effects of hfile_init()
+   in the event opening the stream subsequently fails.  (This is safe to use
+   even if fp is NULL.  This takes care to preserve errno.)  */
+HTSLIB_EXPORT
+void hfile_destroy(hFILE *fp);
+
+
+struct hFILE_scheme_handler {
+    /* Opens a stream when dispatched by hopen(); should call hfile_init()
+       to malloc a struct "derived" from hFILE and initialise it appropriately,
+       including setting base.backend to its own backend vector.  */
+    hFILE *(*open)(const char *filename, const char *mode) HTS_RESULT_USED;
+
+    /* Returns whether the URL denotes remote storage when dispatched by
+       hisremote().  For simple cases, use one of hfile_always_*() below.  */
+    int (*isremote)(const char *filename) HTS_RESULT_USED;
+
+    /* The name of the plugin or other code providing this handler.  */
+    const char *provider;
+
+    /* If multiple handlers are registered for the same scheme, the one with
+       the highest priority is used; range is 0 (lowest) to 100 (highest).
+       This field is used modulo 1000 as a priority; thousands indicate
+       later revisions to this structure, as noted below.  */
+    int priority;
+
+    /* Fields below are present when priority >= 2000.  */
+
+    /* Same as the open() method, used when extra arguments have been given
+       to hopen().  */
+    hFILE *(*vopen)(const char *filename, const char *mode, va_list args)
+        HTS_RESULT_USED;
+};
+
+/* May be used as an isremote() function in simple cases.  */
+HTSLIB_EXPORT
+extern int hfile_always_local (const char *fname);
+HTSLIB_EXPORT
+extern int hfile_always_remote(const char *fname);
+
+/* Should be called by plugins for each URL scheme they wish to handle.  */
+HTSLIB_EXPORT
+void hfile_add_scheme_handler(const char *scheme,
+                              const struct hFILE_scheme_handler *handler);
+
+struct hFILE_plugin {
+    /* On entry, HTSlib's plugin API version (currently 1).  */
+    int api_version;
+
+    /* On entry, the plugin's handle as returned by dlopen() etc.  */
+    void *obj;
+
+    /* The plugin should fill this in with its (human-readable) name.  */
+    const char *name;
+
+    /* The plugin may wish to fill in a function to be called on closing.  */
+    void (*destroy)(void);
+};
+
+#ifdef ENABLE_PLUGINS
+#define PLUGIN_GLOBAL(identifier,suffix) identifier
+
+/* Plugins must define an entry point with this signature.  */
+HTSLIB_EXPORT
+extern int hfile_plugin_init(struct hFILE_plugin *self);
+
+#else
+#define PLUGIN_GLOBAL(identifier,suffix) identifier##suffix
+
+/* Only plugins distributed within the HTSlib source that might be built
+   even with --disable-plugins need to use PLUGIN_GLOBAL and be listed here;
+   others can simply define hfile_plugin_init().  */
+
+extern int hfile_plugin_init_gcs(struct hFILE_plugin *self);
+extern int hfile_plugin_init_libcurl(struct hFILE_plugin *self);
+extern int hfile_plugin_init_s3(struct hFILE_plugin *self);
+extern int hfile_plugin_init_s3_write(struct hFILE_plugin *self);
+#endif
+
+// Callback to allow headers to be set in http connections.  Currently used
+// to allow s3 to renew tokens when seeking.  Kept internal for now,
+// although we may consider exposing it in the API later.
+typedef int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
+
+/** Callback for handling 3xx redirect responses from http connections.
+
+    @param data       is passed to the callback
+    @param response   http response code (e.g. 301)
+    @param headers    http response headers
+    @param new_url    the callback should write the url to switch to in here
+
+    Currently used by s3 to handle switching region endpoints.
+*/
+typedef int (*redirect_callback) (void *data, long response,
+                                  kstring_t *headers, kstring_t *new_url);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/ext/htslib/hfile_libcurl.c
+++ b/ext/htslib/hfile_libcurl.c
--- a/ext/htslib/hfile_s3.c
+++ b/ext/htslib/hfile_s3.c
--- a/ext/htslib/hfile_s3_write.c
+++ b/ext/htslib/hfile_s3_write.c
@ -0,0 +1,896 @@
+/*
+    hfile_s3_write.c - Code to handle multipart uploading to S3.
+
+    Copyright (C) 2019 Genome Research Ltd.
+
+    Author: Andrew Whitwham <aw7@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+
+
+S3 Multipart Upload
+-------------------
+
+There are several steps in the Mulitipart upload.
+
+
+1) Initiate Upload
+------------------
+
+Initiate the upload and get an upload ID.  This ID is used in all other steps.
+
+
+2) Upload Part
+--------------
+
+Upload a part of the data.  5Mb minimum part size (except for the last part).
+Each part is numbered and a successful upload returns an Etag header value that
+needs to used for the completion step.
+
+Step repeated till all data is uploaded.
+
+
+3) Completion
+-------------
+
+Complete the upload by sending all the part numbers along with their associated
+Etag values.
+
+
+Optional - Abort
+----------------
+
+If something goes wrong this instructs the server to delete all the partial
+uploads and abandon the upload process.
+
+
+Andrew Whitwham, January 2019
+*/
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef __MSYS__
+#include <strings.h>
+#endif
+#include <errno.h>
+#include <pthread.h>
+
+#include "hfile_internal.h"
+#ifdef ENABLE_PLUGINS
+#include "version.h"
+#endif
+#include "htslib/hts.h"
+#include "htslib/kstring.h"
+#include "htslib/khash.h"
+
+#include <curl/curl.h>
+
+#define MINIMUM_S3_WRITE_SIZE 5242880
+#define S3_MOVED_PERMANENTLY 301
+#define S3_BAD_REQUEST 400
+
+// Lets the part memory size grow to about 1Gb giving a 2.5Tb max file size.
+// Max. parts allowed by AWS is 10000, so use ceil(10000.0/9.0)
+#define EXPAND_ON 1112
+
+static struct {
+    kstring_t useragent;
+    CURLSH *share;
+    pthread_mutex_t share_lock;
+} curl = { { 0, 0, NULL }, NULL, PTHREAD_MUTEX_INITIALIZER };
+
+static void share_lock(CURL *handle, curl_lock_data data,
+                       curl_lock_access access, void *userptr) {
+    pthread_mutex_lock(&curl.share_lock);
+}
+
+static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
+    pthread_mutex_unlock(&curl.share_lock);
+}
+
+typedef int (*s3_auth_callback) (void *auth_data, char *, kstring_t*, char*, kstring_t*, kstring_t*, kstring_t*, kstring_t*, int);
+
+typedef int (*set_region_callback) (void *auth_data, kstring_t *region);
+
+typedef struct {
+    s3_auth_callback callback;
+    redirect_callback redirect_callback;
+    set_region_callback set_region_callback;
+    void *callback_data;
+} s3_authorisation;
+
+typedef struct {
+    hFILE base;
+    CURL *curl;
+    CURLcode ret;
+    s3_authorisation *au;
+    kstring_t buffer;
+    kstring_t url;
+    kstring_t upload_id;
+    kstring_t completion_message;
+    int part_no;
+    int aborted;
+    size_t index;
+    long verbose;
+    int part_size;
+    int expand;
+} hFILE_s3_write;
+
+
+static void ksinit(kstring_t *s) {
+    s->l = 0;
+    s->m = 0;
+    s->s = NULL;
+}
+
+
+static void ksfree(kstring_t *s) {
+    free(s->s);
+    ksinit(s);
+}
+
+
+static size_t response_callback(void *contents, size_t size, size_t nmemb, void *userp) {
+    size_t realsize = size * nmemb;
+    kstring_t *resp = (kstring_t *)userp;
+
+    if (kputsn((const char *)contents, realsize, resp) == EOF) {
+        return 0;
+    }
+
+    return realsize;
+}
+
+
+static int get_entry(char *in, char *start_tag, char *end_tag, kstring_t *out) {
+    char *start;
+    char *end;
+
+    if (!in) {
+        return EOF;
+    }
+
+    start = strstr(in, start_tag);
+    if (!start) return EOF;
+
+    start += strlen(start_tag);
+    end = strstr(start, end_tag);
+
+    if (!end) return EOF;
+
+    return kputsn(start, end - start, out);
+}
+
+
+static void cleanup_local(hFILE_s3_write *fp) {
+    ksfree(&fp->buffer);
+    ksfree(&fp->url);
+    ksfree(&fp->upload_id);
+    ksfree(&fp->completion_message);
+    curl_easy_cleanup(fp->curl);
+    free(fp->au);
+
+}
+
+
+static void cleanup(hFILE_s3_write *fp) {
+    // free up authorisation data
+    fp->au->callback(fp->au->callback_data,  NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0);
+    cleanup_local(fp);
+}
+
+
+static struct curl_slist *set_html_headers(hFILE_s3_write *fp, kstring_t *auth, kstring_t *date, kstring_t *content, kstring_t *token) {
+    struct curl_slist *headers = NULL;
+
+    headers = curl_slist_append(headers, "Content-Type:"); // get rid of this
+    headers = curl_slist_append(headers, "Expect:");       // and this
+    headers = curl_slist_append(headers, auth->s);
+    headers = curl_slist_append(headers, date->s);
+    headers = curl_slist_append(headers, content->s);
+
+    if (token->l) {
+        headers = curl_slist_append(headers, token->s);
+    }
+
+    curl_easy_setopt(fp->curl, CURLOPT_HTTPHEADER, headers);
+
+    return headers;
+}
+
+
+/*
+    The partially uploaded file will hang around unless the delete command is sent.
+*/
+static int abort_upload(hFILE_s3_write *fp) {
+    kstring_t content_hash = {0, 0, NULL};
+    kstring_t authorisation = {0, 0, NULL};
+    kstring_t url = {0, 0, NULL};
+    kstring_t content = {0, 0, NULL};
+    kstring_t canonical_query_string = {0, 0, NULL};
+    kstring_t date = {0, 0, NULL};
+    kstring_t token = {0, 0, NULL};
+    int ret = -1;
+    struct curl_slist *headers = NULL;
+    char http_request[] = "DELETE";
+
+    if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) {
+        goto out;
+    }
+
+    if (fp->au->callback(fp->au->callback_data,  http_request, NULL,
+                         canonical_query_string.s, &content_hash,
+                         &authorisation, &date, &token, 0) != 0) {
+        goto out;
+    }
+
+    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
+        goto out;
+    }
+
+    if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) {
+        goto out;
+    }
+
+    curl_easy_reset(fp->curl);
+    curl_easy_setopt(fp->curl, CURLOPT_CUSTOMREQUEST, http_request);
+    curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
+    curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
+
+    curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
+
+    headers = set_html_headers(fp, &authorisation, &date, &content, &token);
+    fp->ret = curl_easy_perform(fp->curl);
+
+    if (fp->ret == CURLE_OK) {
+        ret = 0;
+    }
+
+ out:
+    ksfree(&authorisation);
+    ksfree(&content);
+    ksfree(&content_hash);
+    ksfree(&url);
+    ksfree(&date);
+    ksfree(&canonical_query_string);
+    ksfree(&token);
+    curl_slist_free_all(headers);
+
+    fp->aborted = 1;
+    cleanup(fp);
+
+    return ret;
+}
+
+
+static int complete_upload(hFILE_s3_write *fp, kstring_t *resp) {
+    kstring_t content_hash = {0, 0, NULL};
+    kstring_t authorisation = {0, 0, NULL};
+    kstring_t url = {0, 0, NULL};
+    kstring_t content = {0, 0, NULL};
+    kstring_t canonical_query_string = {0, 0, NULL};
+    kstring_t date = {0, 0, NULL};
+    kstring_t token = {0, 0, NULL};
+    int ret = -1;
+    struct curl_slist *headers = NULL;
+    char http_request[] = "POST";
+
+    if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) {
+        return -1;
+    }
+
+    // finish off the completion reply
+    if (kputs("</CompleteMultipartUpload>\n", &fp->completion_message) < 0) {
+        goto out;
+    }
+
+    if (fp->au->callback(fp->au->callback_data,  http_request,
+                         &fp->completion_message, canonical_query_string.s,
+                         &content_hash, &authorisation, &date, &token, 0) != 0) {
+        goto out;
+    }
+
+    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
+        goto out;
+    }
+
+    if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) {
+        goto out;
+    }
+
+    curl_easy_reset(fp->curl);
+    curl_easy_setopt(fp->curl, CURLOPT_POST, 1L);
+    curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, fp->completion_message.s);
+    curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDSIZE, (long) fp->completion_message.l);
+    curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback);
+    curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp);
+    curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
+    curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
+
+    curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
+
+    headers = set_html_headers(fp, &authorisation, &date, &content, &token);
+    fp->ret = curl_easy_perform(fp->curl);
+
+    if (fp->ret == CURLE_OK) {
+        ret = 0;
+    }
+
+ out:
+    ksfree(&authorisation);
+    ksfree(&content);
+    ksfree(&content_hash);
+    ksfree(&url);
+    ksfree(&date);
+    ksfree(&token);
+    ksfree(&canonical_query_string);
+    curl_slist_free_all(headers);
+
+    return ret;
+}
+
+
+static size_t upload_callback(void *ptr, size_t size, size_t nmemb, void *stream) {
+    size_t realsize = size * nmemb;
+    hFILE_s3_write *fp = (hFILE_s3_write *)stream;
+    size_t read_length;
+
+    if (realsize > (fp->buffer.l - fp->index)) {
+        read_length = fp->buffer.l - fp->index;
+    } else {
+        read_length = realsize;
+    }
+
+    memcpy(ptr, fp->buffer.s + fp->index, read_length);
+    fp->index += read_length;
+
+    return read_length;
+}
+
+
+static int upload_part(hFILE_s3_write *fp, kstring_t *resp) {
+    kstring_t content_hash = {0, 0, NULL};
+    kstring_t authorisation = {0, 0, NULL};
+    kstring_t url = {0, 0, NULL};
+    kstring_t content = {0, 0, NULL};
+    kstring_t canonical_query_string = {0, 0, NULL};
+    kstring_t date = {0, 0, NULL};
+    kstring_t token = {0, 0, NULL};
+    int ret = -1;
+    struct curl_slist *headers = NULL;
+    char http_request[] = "PUT";
+
+    if (ksprintf(&canonical_query_string, "partNumber=%d&uploadId=%s", fp->part_no, fp->upload_id.s) < 0) {
+        return -1;
+    }
+
+    if (fp->au->callback(fp->au->callback_data, http_request, &fp->buffer,
+                         canonical_query_string.s, &content_hash,
+                         &authorisation, &date, &token, 0) != 0) {
+        goto out;
+    }
+
+    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
+        goto out;
+    }
+
+    fp->index = 0;
+    if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) {
+        goto out;
+    }
+
+    curl_easy_reset(fp->curl);
+
+    curl_easy_setopt(fp->curl, CURLOPT_UPLOAD, 1L);
+    curl_easy_setopt(fp->curl, CURLOPT_READFUNCTION, upload_callback);
+    curl_easy_setopt(fp->curl, CURLOPT_READDATA, fp);
+    curl_easy_setopt(fp->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fp->buffer.l);
+    curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
+    curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp);
+    curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
+    curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
+
+    curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
+
+    headers = set_html_headers(fp, &authorisation, &date, &content, &token);
+    fp->ret = curl_easy_perform(fp->curl);
+
+    if (fp->ret == CURLE_OK) {
+        ret = 0;
+    }
+
+ out:
+    ksfree(&authorisation);
+    ksfree(&content);
+    ksfree(&content_hash);
+    ksfree(&url);
+    ksfree(&date);
+    ksfree(&token);
+    ksfree(&canonical_query_string);
+    curl_slist_free_all(headers);
+
+    return ret;
+}
+
+
+static ssize_t s3_write(hFILE *fpv, const void *bufferv, size_t nbytes) {
+    hFILE_s3_write *fp = (hFILE_s3_write *)fpv;
+    const char *buffer  = (const char *)bufferv;
+
+    if (kputsn(buffer, nbytes, &fp->buffer) == EOF) {
+        return -1;
+    }
+
+    if (fp->buffer.l > fp->part_size) {
+        // time to write out our data
+        kstring_t response = {0, 0, NULL};
+        int ret;
+
+        ret = upload_part(fp, &response);
+
+        if (!ret) {
+            long response_code;
+            kstring_t etag = {0, 0, NULL};
+
+            curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
+
+            if (response_code > 200) {
+                ret = -1;
+            } else {
+                if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) {
+                    ret = -1;
+                } else {
+                    ksprintf(&fp->completion_message, "\t<Part>\n\t\t<PartNumber>%d</PartNumber>\n\t\t<ETag>%s</ETag>\n\t</Part>\n",
+                        fp->part_no, etag.s);
+
+                    ksfree(&etag);
+                }
+            }
+        }
+
+        ksfree(&response);
+
+        if (ret) {
+            abort_upload(fp);
+            return -1;
+        }
+
+        fp->part_no++;
+        fp->buffer.l = 0;
+
+        if (fp->expand && (fp->part_no % EXPAND_ON == 0)) {
+            fp->part_size *= 2;
+        }
+    }
+
+    return nbytes;
+}
+
+
+static int s3_close(hFILE *fpv) {
+    hFILE_s3_write *fp = (hFILE_s3_write *)fpv;
+    kstring_t response = {0, 0, NULL};
+    int ret = 0;
+
+    if (!fp->aborted) {
+
+        if (fp->buffer.l) {
+            // write the last part
+
+            ret = upload_part(fp, &response);
+
+            if (!ret) {
+                long response_code;
+                kstring_t etag = {0, 0, NULL};
+
+                curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
+
+                if (response_code > 200) {
+                    ret = -1;
+                } else {
+                    if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) {
+                        ret = -1;
+                    } else {
+                        ksprintf(&fp->completion_message, "\t<Part>\n\t\t<PartNumber>%d</PartNumber>\n\t\t<ETag>%s</ETag>\n\t</Part>\n",
+                            fp->part_no, etag.s);
+
+                        ksfree(&etag);
+                    }
+                }
+            }
+
+            ksfree(&response);
+
+            if (ret) {
+                abort_upload(fp);
+                return -1;
+            }
+
+            fp->part_no++;
+        }
+
+        if (fp->part_no > 1) {
+            ret = complete_upload(fp, &response);
+
+            if (!ret) {
+                if (strstr(response.s, "CompleteMultipartUploadResult") == NULL) {
+                    ret = -1;
+                }
+            }
+        } else {
+            ret = -1;
+        }
+
+        if (ret) {
+            abort_upload(fp);
+        } else {
+            cleanup(fp);
+        }
+    }
+
+    ksfree(&response);
+
+    return ret;
+}
+
+
+static int redirect_endpoint(hFILE_s3_write *fp, kstring_t *head) {
+    int ret = -1;
+
+    if (fp->au->redirect_callback) {
+        ret = fp->au->redirect_callback(fp->au->callback_data, 301, head, &fp->url);
+    }
+
+    return ret;
+}
+
+static int handle_bad_request(hFILE_s3_write *fp, kstring_t *resp) {
+    kstring_t region = {0, 0, NULL};
+    int ret = -1;
+
+    if (fp->au->set_region_callback) {
+        if (get_entry(resp->s, "<Region>", "</Region>", &region) == EOF) {
+            return -1;
+        }
+
+        ret = fp->au->set_region_callback(fp->au->callback_data, &region);
+
+        ksfree(&region);
+    }
+
+    return ret;
+}
+
+static int initialise_upload(hFILE_s3_write *fp, kstring_t *head, kstring_t *resp, int user_query) {
+    kstring_t content_hash = {0, 0, NULL};
+    kstring_t authorisation = {0, 0, NULL};
+    kstring_t url = {0, 0, NULL};
+    kstring_t content = {0, 0, NULL};
+    kstring_t date = {0, 0, NULL};
+    kstring_t token = {0, 0, NULL};
+    int ret = -1;
+    struct curl_slist *headers = NULL;
+    char http_request[] = "POST";
+    char delimiter = '?';
+
+    if (user_query) {
+        delimiter = '&';
+    }
+
+    if (fp->au->callback(fp->au->callback_data,  http_request, NULL, "uploads=",
+                         &content_hash, &authorisation, &date, &token, user_query) != 0) {
+        goto out;
+    }
+
+    if (ksprintf(&url, "%s%cuploads", fp->url.s, delimiter) < 0) {
+        goto out;
+    }
+
+    if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) {
+        goto out;
+    }
+
+    curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
+    curl_easy_setopt(fp->curl, CURLOPT_POST, 1L);
+    curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, "");  // send no data
+    curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback);
+    curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp);
+    curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
+    curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)head);
+    curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
+
+    curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
+
+    headers = set_html_headers(fp, &authorisation, &date, &content, &token);
+    fp->ret = curl_easy_perform(fp->curl);
+
+    if (fp->ret == CURLE_OK) {
+        ret = 0;
+    }
+
+ out:
+    ksfree(&authorisation);
+    ksfree(&content);
+    ksfree(&content_hash);
+    ksfree(&url);
+    ksfree(&date);
+    ksfree(&token);
+    curl_slist_free_all(headers);
+
+    return ret;
+}
+
+
+static int get_upload_id(hFILE_s3_write *fp, kstring_t *resp) {
+    int ret = 0;
+
+    ksinit(&fp->upload_id);
+
+    if (get_entry(resp->s, "<UploadId>", "</UploadId>", &fp->upload_id) == EOF) {
+        ret = -1;
+    }
+
+    return ret;
+}
+
+
+static const struct hFILE_backend s3_write_backend = {
+    NULL, s3_write, NULL, NULL, s3_close
+};
+
+
+static hFILE *s3_write_open(const char *url, s3_authorisation *auth) {
+    hFILE_s3_write *fp;
+    kstring_t response = {0, 0, NULL};
+    kstring_t header   = {0, 0, NULL};
+    int ret, has_user_query = 0;
+    char *query_start;
+    const char *env;
+
+
+    if (!auth || !auth->callback || !auth->callback_data) {
+        return NULL;
+    }
+
+    fp = (hFILE_s3_write *)hfile_init(sizeof(hFILE_s3_write), "w", 0);
+
+    if (fp == NULL) {
+        return NULL;
+    }
+
+    if ((fp->curl = curl_easy_init()) == NULL) {
+        errno = ENOMEM;
+        goto error;
+    }
+
+    if ((fp->au = calloc(1, sizeof(s3_authorisation))) == NULL) {
+        goto error;
+    }
+
+    memcpy(fp->au, auth, sizeof(s3_authorisation));
+
+    ksinit(&fp->buffer);
+    ksinit(&fp->url);
+    ksinit(&fp->completion_message);
+    fp->aborted = 0;
+
+    fp->part_size = MINIMUM_S3_WRITE_SIZE;
+    fp->expand = 1;
+
+    if ((env = getenv("HTS_S3_PART_SIZE")) != NULL) {
+        int part_size = atoi(env) * 1024 * 1024;
+
+        if (part_size > fp->part_size)
+            fp->part_size = part_size;
+
+        fp->expand = 0;
+    }
+
+    if (hts_verbose >= 8) {
+        fp->verbose = 1L;
+    } else {
+        fp->verbose = 0L;
+    }
+
+    kputs(url + 4, &fp->url);
+
+    if ((query_start = strchr(fp->url.s, '?'))) {
+        has_user_query = 1;;
+    }
+
+    ret = initialise_upload(fp, &header, &response, has_user_query);
+
+    if (ret == 0) {
+        long response_code;
+
+        curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
+
+        if (response_code == S3_MOVED_PERMANENTLY) {
+            if (redirect_endpoint(fp, &header) == 0) {
+                ksfree(&response);
+                ksfree(&header);
+
+                ret = initialise_upload(fp, &header, &response, has_user_query);
+            }
+        } else if (response_code == S3_BAD_REQUEST) {
+            if (handle_bad_request(fp, &response) == 0) {
+                ksfree(&response);
+                ksfree(&header);
+
+                ret = initialise_upload(fp, &header, &response, has_user_query);
+            }
+        }
+
+        ksfree(&header); // no longer needed
+    }
+
+    if (ret) goto error;
+
+    if (get_upload_id(fp, &response)) goto error;
+
+    // start the completion message (a formatted list of parts)
+    ksinit(&fp->completion_message);
+
+    if (kputs("<CompleteMultipartUpload>\n", &fp->completion_message) == EOF) {
+        goto error;
+    }
+
+    fp->part_no = 1;
+
+    // user query string no longer a useful part of the URL
+    if (query_start)
+         *query_start = '\0';
+
+    fp->base.backend = &s3_write_backend;
+    ksfree(&response);
+
+    return &fp->base;
+
+error:
+    ksfree(&response);
+    cleanup_local(fp);
+    hfile_destroy((hFILE *)fp);
+    return NULL;
+}
+
+
+static hFILE *hopen_s3_write(const char *url, const char *mode) {
+    if (hts_verbose >= 1) {
+        fprintf(stderr, "[E::%s] s3w:// URLs should not be used directly; use s3:// instead.\n", __func__);
+    }
+    return NULL;
+}
+
+
+static int parse_va_list(s3_authorisation *auth, va_list args) {
+    const char *argtype;
+
+    while  ((argtype = va_arg(args, const char *)) != NULL) {
+        if (strcmp(argtype, "s3_auth_callback") == 0) {
+            auth->callback = va_arg(args, s3_auth_callback);
+        } else if (strcmp(argtype, "s3_auth_callback_data") == 0) {
+            auth->callback_data = va_arg(args, void *);
+        } else if (strcmp(argtype, "redirect_callback") == 0) {
+            auth->redirect_callback = va_arg(args, redirect_callback);
+        } else if (strcmp(argtype, "set_region_callback") == 0) {
+            auth->set_region_callback = va_arg(args, set_region_callback);
+        } else if (strcmp(argtype, "va_list") == 0) {
+            va_list *args2 = va_arg(args, va_list *);
+
+            if (args2) {
+                if (parse_va_list(auth, *args2) < 0) return -1;
+            }
+        } else {
+            errno = EINVAL;
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+static hFILE *vhopen_s3_write(const char *url, const char *mode, va_list args) {
+    hFILE *fp = NULL;
+    s3_authorisation auth = {NULL, NULL, NULL};
+
+    if (parse_va_list(&auth, args) == 0) {
+        fp =  s3_write_open(url, &auth);
+    }
+
+    return fp;
+}
+
+
+static void s3_write_exit(void) {
+    if (curl_share_cleanup(curl.share) == CURLSHE_OK)
+        curl.share = NULL;
+
+    free(curl.useragent.s);
+    curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
+    curl_global_cleanup();
+}
+
+
+int PLUGIN_GLOBAL(hfile_plugin_init,_s3_write)(struct hFILE_plugin *self) {
+
+    static const struct hFILE_scheme_handler handler =
+        { hopen_s3_write, hfile_always_remote, "S3 Multipart Upload",
+          2000 + 50, vhopen_s3_write
+        };
+
+#ifdef ENABLE_PLUGINS
+    // Embed version string for examination via strings(1) or what(1)
+    static const char id[] =
+        "@(#)hfile_s3_write plugin (htslib)\t" HTS_VERSION_TEXT;
+    const char *version = strchr(id, '\t') + 1;
+
+    if (hts_verbose >= 9)
+        fprintf(stderr, "[M::hfile_s3_write.init] version %s\n",
+                version);
+#else
+    const char *version = hts_version();
+#endif
+
+    const curl_version_info_data *info;
+    CURLcode err;
+    CURLSHcode errsh;
+
+    err = curl_global_init(CURL_GLOBAL_ALL);
+
+    if (err != CURLE_OK) {
+        // look at putting in an errno here
+        return -1;
+    }
+
+    curl.share = curl_share_init();
+
+    if (curl.share == NULL) {
+        curl_global_cleanup();
+        errno = EIO;
+        return -1;
+    }
+
+    errsh  = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
+    errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
+    errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
+
+    if (errsh != 0) {
+        curl_share_cleanup(curl.share);
+        curl_global_cleanup();
+        errno = EIO;
+        return -1;
+    }
+
+    info = curl_version_info(CURLVERSION_NOW);
+    ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
+
+    self->name = "S3 Multipart Upload";
+    self->destroy = s3_write_exit;
+
+    hfile_add_scheme_handler("s3w",       &handler);
+    hfile_add_scheme_handler("s3w+http",  &handler);
+    hfile_add_scheme_handler("s3w+https", &handler);
+
+    return 0;
+}
--- a/ext/htslib/hts.c
+++ b/ext/htslib/hts.c
--- a/ext/htslib/hts_expr.c
+++ b/ext/htslib/hts_expr.c
@ -0,0 +1,927 @@
+/*  hts_expr.c -- filter expression parsing and processing.
+
+    Copyright (C) 2020-2022, 2024 Genome Research Ltd.
+
+    Author: James Bonfield <jkb@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notices and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+// TODO:
+// - ?: operator for conditionals?
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <float.h>
+#include <regex.h>
+#include <math.h>
+
+#include "htslib/hts_expr.h"
+#include "htslib/hts_log.h"
+#include "textutils_internal.h"
+
+// Could also cache hts_expr_val_t stack here for kstring reuse?
+#define MAX_REGEX 10
+struct hts_filter_t {
+    char *str;
+    int parsed;
+    int curr_regex, max_regex;
+    regex_t preg[MAX_REGEX];
+};
+
+/*
+ * This is designed to be mostly C like with mostly same the precedence rules,
+ * with the exception of bit operators (widely considered as a mistake in C).
+ * It's not full C (eg no bit-shifting), but good enough for our purposes.
+ *
+ * Supported syntax, in order of precedence:
+ *
+ * Grouping:      (, ),   eg "(1+2)*3"
+ * Values:        integers, floats, strings or variables
+ * Unary ops:     +, -, !, ~  eg -10 +10, !10 (0), ~5 (bitwise not)
+ * Math ops:      *, /, %  [TODO: add // for floor division?]
+ * Math ops:      +, -
+ * Bit-wise:      &, ^, |  [NB as 3 precedence levels, in that order]
+ * Conditionals:  >, >=, <, <=,
+ * Equality:      ==, !=, =~, !~
+ * Boolean:       &&, ||
+ */
+
+// Skip to start of term
+static char *ws(char *str) {
+    while (*str && (*str == ' ' || *str == '\t'))
+        str++;
+    return str;
+}
+
+static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                      char *str, char **end, hts_expr_val_t *res);
+
+/*
+ * Simple functions operating on strings only.
+ * length, min, max, avg.
+ *
+ * All return 0 on success,
+ *           -1 on failure
+ */
+static int expr_func_length(hts_expr_val_t *res) {
+    if (!res->is_str)
+        return -1;
+
+    res->is_str = 0;
+    res->d = res->s.l;
+    return 0;
+}
+
+static int expr_func_min(hts_expr_val_t *res) {
+    if (!res->is_str)
+        return -1;
+
+    size_t l = res->s.l;
+    int v = INT_MAX;
+    const uint8_t *x = (uint8_t *)res->s.s;
+    for (l = 0; l < res->s.l; l++)
+        if (v > x[l])
+            v = x[l];
+
+    res->is_str = 0;
+    res->d = v == INT_MAX ? NAN : v;
+
+    return 0;
+}
+
+static int expr_func_max(hts_expr_val_t *res) {
+    if (!res->is_str)
+        return -1;
+
+    size_t l = res->s.l;
+    int v = INT_MIN;
+    const uint8_t *x = (uint8_t *)res->s.s;
+    for (l = 0; l < res->s.l; l++)
+        if (v < x[l])
+            v = x[l];
+
+    res->is_str = 0;
+    res->d = v == INT_MIN ? NAN : v;
+
+    return 0;
+}
+
+static int expr_func_avg(hts_expr_val_t *res) {
+    if (!res->is_str)
+        return -1;
+
+    size_t l = res->s.l;
+    double v = 0;
+    const uint8_t *x = (uint8_t *)res->s.s;
+    for (l = 0; l < res->s.l; l++)
+        v += x[l];
+    if (l)
+        v /= l;
+
+    res->is_str = 0;
+    res->d = v;
+
+    return 0;
+}
+
+/*
+ * functions:  FUNC(expr).
+ * Note for simplicity of parsing, the "(" must immediately follow FUNC,
+ * so "FUNC (x)" is invalid.
+ */
+static int func_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                     char *str, char **end, hts_expr_val_t *res) {
+    int func_ok = -1;
+    switch (*str) {
+    case 'a':
+        if (strncmp(str, "avg(", 4) == 0) {
+            if (expression(filt, data, fn, str+4, end, res)) return -1;
+            func_ok = expr_func_avg(res);
+        }
+        break;
+
+    case 'd':
+        if (strncmp(str, "default(", 8) == 0) {
+            if (expression(filt, data, fn, str+8, end, res)) return -1;
+            if (**end != ',')
+                return -1;
+            (*end)++;
+            hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+            if (expression(filt, data, fn, ws(*end), end, &val)) return -1;
+            func_ok = 1;
+            if (!hts_expr_val_existsT(res)) {
+                kstring_t swap = res->s;
+                *res = val;
+                val.s = swap;
+                hts_expr_val_free(&val);
+            }
+        }
+        break;
+
+    case 'e':
+        if (strncmp(str, "exists(", 7) == 0) {
+            if (expression(filt, data, fn, str+7, end, res)) return -1;
+            func_ok = 1;
+            res->is_true = res->d = hts_expr_val_existsT(res);
+            res->is_str = 0;
+        } else if (strncmp(str, "exp(", 4) == 0) {
+            if (expression(filt, data, fn, str+4, end, res)) return -1;
+            func_ok = 1;
+            res->d = exp(res->d);
+            res->is_str = 0;
+            if (isnan(res->d))
+                hts_expr_val_undef(res);
+        }
+
+        break;
+
+    case 'l':
+        if (strncmp(str, "length(", 7) == 0) {
+            if (expression(filt, data, fn, str+7, end, res)) return -1;
+            func_ok = expr_func_length(res);
+        } else if (strncmp(str, "log(", 4) == 0) {
+            if (expression(filt, data, fn, str+4, end, res)) return -1;
+            func_ok = 1;
+            res->d = log(res->d);
+            res->is_str = 0;
+            if (isnan(res->d))
+                hts_expr_val_undef(res);
+        }
+        break;
+
+    case 'm':
+        if (strncmp(str, "min(", 4) == 0) {
+            if (expression(filt, data, fn, str+4, end, res)) return -1;
+            func_ok = expr_func_min(res);
+        } else if (strncmp(str, "max(", 4) == 0) {
+            if (expression(filt, data, fn, str+4, end, res)) return -1;
+            func_ok = expr_func_max(res);
+        }
+        break;
+
+    case 'p':
+        if (strncmp(str, "pow(", 4) == 0) {
+            if (expression(filt, data, fn, str+4, end, res)) return -1;
+            func_ok = 1;
+
+            if (**end != ',')
+                return -1;
+            (*end)++;
+            hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+            if (expression(filt, data, fn, ws(*end), end, &val)) return -1;
+            if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
+                hts_expr_val_undef(res);
+            } else if (res->is_str || val.is_str) {
+                hts_expr_val_free(&val); // arith on strings
+                return -1;
+            } else {
+                func_ok = 1;
+                res->d = pow(res->d, val.d);
+                hts_expr_val_free(&val);
+                res->is_str = 0;
+            }
+
+            if (isnan(res->d))
+                hts_expr_val_undef(res);
+        }
+        break;
+
+    case 's':
+        if (strncmp(str, "sqrt(", 5) == 0) {
+            if (expression(filt, data, fn, str+5, end, res)) return -1;
+            func_ok = 1;
+            res->d = sqrt(res->d);
+            res->is_str = 0;
+            if (isnan(res->d))
+                hts_expr_val_undef(res);
+        }
+        break;
+    }
+
+    if (func_ok < 0)
+        return -1;
+
+    str = ws(*end);
+    if (*str != ')') {
+        fprintf(stderr, "Missing ')'\n");
+        return -1;
+    }
+    *end = str+1;
+
+    return 0;
+}
+
+/*
+ * simple_expr
+ *     : identifier
+ *     | constant
+ *     | string
+ *     | func_expr
+ *     | '(' expression ')'
+*/
+static int simple_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                       char *str, char **end, hts_expr_val_t *res) {
+    // Main recursion step
+    str = ws(str);
+    if (*str == '(') {
+        if (expression(filt, data, fn, str+1, end, res)) return -1;
+        str = ws(*end);
+        if (*str != ')') {
+            fprintf(stderr, "Missing ')'\n");
+            return -1;
+        }
+        *end = str+1;
+
+        return 0;
+    }
+
+    // Otherwise a basic element.
+    int fail = 0;
+    double d = hts_str2dbl(str, end, &fail);
+    if (str != *end) {
+        res->is_str = 0;
+        res->d = d;
+    } else {
+        // Not valid floating point syntax.
+        // TODO: add function call names in here; len(), sqrt(), pow(), etc
+        if (*str == '"') {
+            res->is_str = 1;
+            char *e = str+1;
+            int backslash = 0;
+            while (*e && *e != '"') {
+                if (*e == '\\')
+                    backslash=1, e+=1+(e[1]!='\0');
+                else
+                    e++;
+            }
+
+            kputsn(str+1, e-(str+1), ks_clear(&res->s));
+            if (backslash) {
+                size_t i, j;
+                for (i = j = 0; i < res->s.l; i++) {
+                    res->s.s[j++] = res->s.s[i];
+                    if (res->s.s[i] == '\\') {
+                        switch (res->s.s[++i]) {
+                        case '"': res->s.s[j-1] = '"'; break;
+                        case '\\':res->s.s[j-1] = '\\'; break;
+                        case 't': res->s.s[j-1] = '\t'; break;
+                        case 'n': res->s.s[j-1] = '\n'; break;
+                        case 'r': res->s.s[j-1] = '\r'; break;
+                        default:  res->s.s[j++] = res->s.s[i];
+                        }
+                    }
+                }
+                res->s.s[j] = 0;
+                res->s.l = j;
+            }
+            if (*e != '"')
+                return -1;
+            *end = e+1;
+        } else if (fn) {
+            // Try lookup as variable, if not as function
+            if (fn(data, str, end, res) == 0)
+                return 0;
+            else
+                return func_expr(filt, data, fn, str, end, res);
+        } else {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * unary_expr
+ *     : simple_expr
+ *     | '+' simple_expr
+ *     | '-' simple_expr
+ *     | '!' unary_expr // higher precedence
+ *     | '~' unary_expr // higher precedence
+ */
+static int unary_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                      char *str, char **end, hts_expr_val_t *res) {
+    int err;
+    str = ws(str);
+    if (*str == '+' || *str == '-') {
+        err = simple_expr(filt, data, fn, str+1, end, res);
+        if (!hts_expr_val_exists(res)) {
+            hts_expr_val_undef(res);
+        } else {
+            err |= res->is_str;
+            if (*str == '-')
+                res->d = -res->d;
+            res->is_true = res->d != 0;
+        }
+    } else if (*str == '!') {
+        err = unary_expr(filt, data, fn, str+1, end, res);
+        if (res->is_true) {
+            // Any explicitly true value becomes false
+            res->d = res->is_true = 0;
+        } else if (!hts_expr_val_exists(res)) {
+            // We can also still negate undef values by toggling the
+            // is_true override value.
+            res->d = res->is_true = !res->is_true;
+        } else if (res->is_str) {
+            // !null = true, !"foo" = false, NOTE: !"" = false also
+            res->d = res->is_true = (res->s.s == NULL);
+        } else {
+            res->d = !(int64_t)res->d;
+            res->is_true = res->d != 0;
+        }
+        res->is_str = 0;
+    } else if (*str == '~') {
+        err = unary_expr(filt, data, fn, str+1, end, res);
+        if (!hts_expr_val_exists(res)) {
+            hts_expr_val_undef(res);
+        } else {
+            err |= res->is_str;
+            if (!hts_expr_val_exists(res)) {
+                hts_expr_val_undef(res);
+            } else {
+                res->d = ~(int64_t)res->d;
+                res->is_true = res->d != 0;
+            }
+        }
+    } else {
+        err = simple_expr(filt, data, fn, str, end, res);
+    }
+    return err ? -1 : 0;
+}
+
+
+/*
+ * mul_expr
+ *     : unary_expr (
+ *           '*' unary_expr
+ *         | '/' unary_expr
+ *         | '%' unary_expr
+ *       )*
+ */
+static int mul_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                    char *str, char **end, hts_expr_val_t *res) {
+    if (unary_expr(filt, data, fn, str, end, res))
+        return -1;
+
+    str = *end;
+    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+    while (*str) {
+        str = ws(str);
+        if (*str == '*' || *str == '/' || *str == '%') {
+            if (unary_expr(filt, data, fn, str+1, end, &val)) return -1;
+            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
+                hts_expr_val_undef(res);
+            } else if (val.is_str || res->is_str) {
+                hts_expr_val_free(&val);
+                return -1; // arith on strings
+            }
+        }
+
+        if (*str == '*')
+            res->d *= val.d;
+        else if (*str == '/')
+            res->d /= val.d;
+        else if (*str == '%') {
+            if (val.d)
+                res->d = (int64_t)res->d % (int64_t)val.d;
+            else
+                hts_expr_val_undef(res);
+        } else
+            break;
+
+        res->is_true = hts_expr_val_exists(res) && (res->d != 0);
+        str = *end;
+    }
+
+    hts_expr_val_free(&val);
+
+    return 0;
+}
+
+/*
+ * add_expr
+ *     : mul_expr (
+ *           '+' mul_expr
+ *         | '-' mul_expr
+ *       )*
+ */
+static int add_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                    char *str, char **end, hts_expr_val_t *res) {
+    if (mul_expr(filt, data, fn, str, end, res))
+        return -1;
+
+    str = *end;
+    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+    while (*str) {
+        str = ws(str);
+        int undef = 0;
+        if (*str == '+' || *str == '-') {
+            if (mul_expr(filt, data, fn, str+1, end, &val)) return -1;
+            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
+                undef = 1;
+            } else if (val.is_str || res->is_str) {
+                hts_expr_val_free(&val);
+                return -1; // arith on strings
+            }
+        }
+
+        if (*str == '+')
+            res->d += val.d;
+        else if (*str == '-')
+            res->d -= val.d;
+        else
+            break;
+
+        if (undef)
+            hts_expr_val_undef(res);
+        else
+            res->is_true = res->d != 0;
+
+        str = *end;
+    }
+
+    hts_expr_val_free(&val);
+
+    return 0;
+}
+
+/*
+ * bitand_expr
+ *     : add_expr
+ *     | bitand_expr '&' add_expr
+ */
+static int bitand_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                       char *str, char **end, hts_expr_val_t *res) {
+    if (add_expr(filt, data, fn, str, end, res)) return -1;
+
+    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+    int undef = 0;
+    for (;;) {
+        str = ws(*end);
+        if (*str == '&' && str[1] != '&') {
+            if (add_expr(filt, data, fn, str+1, end, &val)) return -1;
+            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
+                undef = 1;
+            } else if (res->is_str || val.is_str) {
+                hts_expr_val_free(&val);
+                return -1;
+            } else {
+                res->is_true =
+                    (res->d = ((int64_t)res->d & (int64_t)val.d)) != 0;
+            }
+        } else {
+            break;
+        }
+    }
+    hts_expr_val_free(&val);
+    if (undef)
+        hts_expr_val_undef(res);
+
+    return 0;
+}
+
+/*
+ * bitxor_expr
+ *     : bitand_expr
+ *     | bitxor_expr '^' bitand_expr
+ */
+static int bitxor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                       char *str, char **end, hts_expr_val_t *res) {
+    if (bitand_expr(filt, data, fn, str, end, res)) return -1;
+
+    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+    int undef = 0;
+    for (;;) {
+        str = ws(*end);
+        if (*str == '^') {
+            if (bitand_expr(filt, data, fn, str+1, end, &val)) return -1;
+            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
+                undef = 1;
+            } else if (res->is_str || val.is_str) {
+                hts_expr_val_free(&val);
+                return -1;
+            } else {
+                res->is_true =
+                    (res->d = ((int64_t)res->d ^ (int64_t)val.d)) != 0;
+            }
+        } else {
+            break;
+        }
+    }
+    hts_expr_val_free(&val);
+    if (undef)
+        hts_expr_val_undef(res);
+
+    return 0;
+}
+
+/*
+ * bitor_expr
+ *     : bitxor_expr
+ *     | bitor_expr '|' bitxor_expr
+ */
+static int bitor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                      char *str, char **end, hts_expr_val_t *res) {
+    if (bitxor_expr(filt, data, fn, str, end, res)) return -1;
+
+    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+    int undef = 0;
+    for (;;) {
+        str = ws(*end);
+        if (*str == '|' && str[1] != '|') {
+            if (bitxor_expr(filt, data, fn, str+1, end, &val)) return -1;
+            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
+                undef = 1;
+            } else if (res->is_str || val.is_str) {
+                hts_expr_val_free(&val);
+                return -1;
+            } else {
+                res->is_true =
+                    (res->d = ((int64_t)res->d | (int64_t)val.d)) != 0;
+            }
+        } else {
+            break;
+        }
+    }
+    hts_expr_val_free(&val);
+    if (undef)
+        hts_expr_val_undef(res);
+
+    return 0;
+}
+
+/*
+ * cmp_expr
+ *     : bitor_expr
+ *     | cmp_expr '<=' bitor_expr
+ *     | cmp_expr '<'  bitor_expr
+ *     | cmp_expr '>=' bitor_expr
+ *     | cmp_expr '>'  bitor_expr
+ */
+static int cmp_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                    char *str, char **end, hts_expr_val_t *res) {
+    if (bitor_expr(filt, data, fn, str, end, res)) return -1;
+
+    str = ws(*end);
+    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+    int err = 0, cmp_done = 0;
+
+    if (*str == '>' && str[1] == '=') {
+        cmp_done = 1;
+        err = cmp_expr(filt, data, fn, str+2, end, &val);
+        if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
+            hts_expr_val_undef(res);
+        } else {
+            res->is_true=res->d
+                = res->is_str && res->s.s && val.is_str && val.s.s
+                ? strcmp(res->s.s, val.s.s) >= 0
+                : !res->is_str && !val.is_str && res->d >= val.d;
+            res->is_str = 0;
+        }
+    } else if (*str == '>') {
+        cmp_done = 1;
+        err = cmp_expr(filt, data, fn, str+1, end, &val);
+        if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
+            hts_expr_val_undef(res);
+        } else {
+            res->is_true=res->d
+                = res->is_str && res->s.s && val.is_str && val.s.s
+                ? strcmp(res->s.s, val.s.s) > 0
+                : !res->is_str && !val.is_str && res->d > val.d;
+            res->is_str = 0;
+        }
+    } else if (*str == '<' && str[1] == '=') {
+        cmp_done = 1;
+        err = cmp_expr(filt, data, fn, str+2, end, &val);
+        if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
+            hts_expr_val_undef(res);
+        } else {
+            res->is_true=res->d
+                = res->is_str && res->s.s && val.is_str && val.s.s
+                ? strcmp(res->s.s, val.s.s) <= 0
+                : !res->is_str && !val.is_str && res->d <= val.d;
+            res->is_str = 0;
+        }
+    } else if (*str == '<') {
+        cmp_done = 1;
+        err = cmp_expr(filt, data, fn, str+1, end, &val);
+        if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
+            hts_expr_val_undef(res);
+        } else {
+            res->is_true=res->d
+                = res->is_str && res->s.s && val.is_str && val.s.s
+                ? strcmp(res->s.s, val.s.s) < 0
+                : !res->is_str && !val.is_str && res->d < val.d;
+            res->is_str = 0;
+        }
+    }
+
+    if (cmp_done && (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)))
+        hts_expr_val_undef(res);
+    hts_expr_val_free(&val);
+
+    return err ? -1 : 0;
+}
+
+/*
+ * eq_expr
+ *     : cmp_expr
+ *     | eq_expr '==' cmp_expr
+ *     | eq_expr '!=' cmp_expr
+ *     | eq_expr '=~' cmp_expr
+ *     | eq_expr '!~' cmp_expr
+ */
+static int eq_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                   char *str, char **end, hts_expr_val_t *res) {
+    if (cmp_expr(filt, data, fn, str, end, res)) return -1;
+
+    str = ws(*end);
+
+    int err = 0, eq_done = 0;
+    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+
+    // numeric vs numeric comparison is as expected
+    // string vs string comparison is as expected
+    // numeric vs string is false
+    if (str[0] == '=' && str[1] == '=') {
+        eq_done = 1;
+        if ((err = eq_expr(filt, data, fn, str+2, end, &val))) {
+            res->is_true = res->d = 0;
+        } else {
+            if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
+                hts_expr_val_undef(res);
+            } else {
+                res->is_true = res->d = res->is_str
+                    ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s)==0 :0)
+                    : !res->is_str && !val.is_str && res->d == val.d;
+            }
+        }
+        res->is_str = 0;
+
+    } else if (str[0] == '!' && str[1] == '=') {
+        eq_done = 1;
+        if ((err = eq_expr(filt, data, fn, str+2, end, &val))) {
+            res->is_true = res->d = 0;
+        } else {
+            if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
+                hts_expr_val_undef(res);
+            } else {
+                res->is_true = res->d = res->is_str
+                    ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s) != 0 :1)
+                    : res->is_str != val.is_str || res->d != val.d;
+            }
+        }
+        res->is_str = 0;
+
+    } else if ((str[0] == '=' && str[1] == '~') ||
+               (str[0] == '!' && str[1] == '~')) {
+        eq_done = 1;
+        err = eq_expr(filt, data, fn, str+2, end, &val);
+        if (!val.is_str || !res->is_str) {
+            hts_expr_val_free(&val);
+            return -1;
+        }
+        if (val.s.s && res->s.s && val.is_true >= 0 && res->is_true >= 0) {
+            regex_t preg_, *preg;
+            if (filt->curr_regex >= filt->max_regex) {
+                // Compile regex if not seen before
+                if (filt->curr_regex >= MAX_REGEX) {
+                    preg = &preg_;
+                } else {
+                    preg = &filt->preg[filt->curr_regex];
+                    filt->max_regex++;
+                }
+
+                int ec = regcomp(preg, val.s.s, REG_EXTENDED | REG_NOSUB);
+                if (ec != 0) {
+                    char errbuf[1024];
+                    regerror(ec, preg, errbuf, 1024);
+                    fprintf(stderr, "Failed regex: %.1024s\n", errbuf);
+                    hts_expr_val_free(&val);
+                    return -1;
+                }
+            } else {
+                preg = &filt->preg[filt->curr_regex];
+            }
+            res->is_true = res->d = regexec(preg, res->s.s, 0, NULL, 0) == 0
+                ? *str == '='  // matcn
+                : *str == '!'; // no-match
+            if (preg == &preg_)
+                regfree(preg);
+
+            filt->curr_regex++;
+        } else {
+            // nul regexp or input is considered false
+            res->is_true = 0;
+        }
+        res->is_str = 0;
+    }
+
+    if (eq_done && ((!hts_expr_val_exists(&val)) || !hts_expr_val_exists(res)))
+        hts_expr_val_undef(res);
+    hts_expr_val_free(&val);
+
+    return err ? -1 : 0;
+}
+
+/*
+ * and_expr
+ *     : eq_expr
+ *     | and_expr 'and' eq_expr
+ *     | and_expr 'or'  eq_expr
+ */
+static int and_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                    char *str, char **end, hts_expr_val_t *res) {
+    if (eq_expr(filt, data, fn, str, end, res)) return -1;
+
+    for (;;) {
+        hts_expr_val_t val = HTS_EXPR_VAL_INIT;
+        str = ws(*end);
+        if (str[0] == '&' && str[1] == '&') {
+            if (eq_expr(filt, data, fn, str+2, end, &val)) return -1;
+            if (!hts_expr_val_existsT(res) || !hts_expr_val_existsT(&val)) {
+                hts_expr_val_undef(res);
+                res->d = 0;
+            } else {
+                res->is_true = res->d =
+                    (res->is_true || (res->is_str && res->s.s) || res->d) &&
+                    (val.is_true  || (val.is_str && val.s.s) || val.d);
+                res->is_str = 0;
+            }
+        } else if (str[0] == '|' && str[1] == '|') {
+            if (eq_expr(filt, data, fn, str+2, end, &val)) return -1;
+            if (!hts_expr_val_existsT(res) && !hts_expr_val_existsT(&val)) {
+                // neither defined
+                hts_expr_val_undef(res);
+                res->d = 0;
+            } else if (!hts_expr_val_existsT(res) &&
+                       !(val.is_true  || (val.is_str  && val.s.s ) || val.d)) {
+                // LHS undef and RHS false
+                hts_expr_val_undef(res);
+                res->d = 0;
+            } else if (!hts_expr_val_existsT(&val) &&
+                       !(res->is_true || (res->is_str && res->s.s) || res->d)){
+                // RHS undef and LHS false
+                hts_expr_val_undef(res);
+                res->d = 0;
+            } else {
+                res->is_true = res->d =
+                    res->is_true || (res->is_str && res->s.s) || res->d ||
+                    val.is_true  || (val.is_str  && val.s.s ) || val.d;
+                res->is_str = 0;
+            }
+        } else {
+            break;
+        }
+        hts_expr_val_free(&val);
+    }
+
+    return 0;
+}
+
+static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
+                      char *str, char **end, hts_expr_val_t *res) {
+    return and_expr(filt, data, fn, str, end, res);
+}
+
+hts_filter_t *hts_filter_init(const char *str) {
+    hts_filter_t *f = calloc(1, sizeof(*f));
+    if (!f) return NULL;
+
+    // Oversize to permit faster comparisons with memcmp over strcmp
+    size_t len = strlen(str)+100;
+    if (!(f->str = malloc(len))) {
+        free(f);
+        return NULL;
+    }
+    strcpy(f->str, str);
+    return f;
+}
+
+void hts_filter_free(hts_filter_t *filt) {
+    if (!filt)
+        return;
+
+    int i;
+    for (i = 0; i < filt->max_regex; i++)
+        regfree(&filt->preg[i]);
+
+    free(filt->str);
+    free(filt);
+}
+
+static int hts_filter_eval_(hts_filter_t *filt,
+                            void *data, hts_expr_sym_func *fn,
+                            hts_expr_val_t *res) {
+    char *end = NULL;
+
+    filt->curr_regex = 0;
+    if (expression(filt, data, fn, filt->str, &end, res))
+        return -1;
+
+    if (end && *ws(end)) {
+        fprintf(stderr, "Unable to parse expression at %s\n", filt->str);
+        return -1;
+    }
+
+    // Strings evaluate to true.  An empty string is also true, but an
+    // absent (null) string is false, unless overriden by is_true.  An
+    // empty string has kstring length of zero, but a pointer as it's
+    // nul-terminated.
+    if (res->is_str) {
+        res->is_true |= res->s.s != NULL;
+        res->d = res->is_true;
+    } else if (hts_expr_val_exists(res)) {
+        res->is_true |= res->d != 0;
+    }
+
+    return 0;
+}
+
+int hts_filter_eval(hts_filter_t *filt,
+                    void *data, hts_expr_sym_func *fn,
+                    hts_expr_val_t *res) {
+    if (res->s.l != 0 || res->s.m != 0 || res->s.s != NULL) {
+        // As *res is cleared below, it's not safe to call this function
+        // with res->s.s set, as memory would be leaked.  It's also not
+        // possible to know is res was initialised correctly, so in
+        // either case we fail.
+        hts_log_error("Results structure must be cleared before calling this function");
+        return -1;
+    }
+
+    memset(res, 0, sizeof(*res));
+
+    return hts_filter_eval_(filt, data, fn, res);
+}
+
+int hts_filter_eval2(hts_filter_t *filt,
+                     void *data, hts_expr_sym_func *fn,
+                     hts_expr_val_t *res) {
+    ks_free(&res->s);
+    memset(res, 0, sizeof(*res));
+
+    return hts_filter_eval_(filt, data, fn, res);
+}
--- a/ext/htslib/hts_internal.h
+++ b/ext/htslib/hts_internal.h
@ -0,0 +1,149 @@
+/*  hts_internal.h -- internal functions; not part of the public API.
+
+    Copyright (C) 2015-2016, 2018-2020 Genome Research Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#ifndef HTSLIB_HTS_INTERNAL_H
+#define HTSLIB_HTS_INTERNAL_H
+
+#include <stddef.h>
+#include <ctype.h>
+
+#include "htslib/hts.h"
+#include "textutils_internal.h"
+
+#define HTS_MAX_EXT_LEN 9
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct hFILE;
+
+struct hts_json_token {
+    char type;    ///< Token type
+    char *str;    ///< Value as a C string (filled in for all token types)
+    // TODO Add other fields to fill in for particular data types, e.g.
+    // int inum;
+    // float fnum;
+};
+
+struct cram_fd;
+
+/*
+ * Check the existence of a local index file using part of the alignment file name.
+ * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai
+ * @param fn    - pointer to the file name
+ * @param fnidx - pointer to the index file name placeholder
+ * @return        1 for success, 0 for failure
+ */
+int hts_idx_check_local(const char *fn, int fmt, char **fnidx);
+
+// Retrieve the name of the index file and also download it, if it is remote
+char *hts_idx_getfn(const char *fn, const char *ext);
+
+// Retrieve the name of the index file, but do not download it, if it is remote
+char *hts_idx_locatefn(const char *fn, const char *ext);
+
+// Used for on-the-fly indexing.  See the comments in hts.c.
+void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset);
+
+int hts_idx_fmt(hts_idx_t *idx);
+
+// Internal interface to save on-the-fly indexes.  The index file handle
+// is kept open so hts_close() can close if after writing out the EOF
+// block for its own file.
+int hts_idx_save_but_not_close(hts_idx_t *idx, const char *fnidx, int fmt);
+
+// Construct a unique filename based on fname and open it.
+struct hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname);
+
+// Check that index is capable of storing items in range beg..end
+int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end);
+
+// The CRAM implementation stores the loaded index within the cram_fd rather
+// than separately as is done elsewhere in htslib.  So if p is a pointer to
+// an hts_idx_t with p->fmt == HTS_FMT_CRAI, then it actually points to an
+// hts_cram_idx_t and should be cast accordingly.
+typedef struct hts_cram_idx_t {
+    int fmt;
+    struct cram_fd *cram;
+} hts_cram_idx_t;
+
+// Determine whether the string's contents appear to be UTF-16-encoded text.
+// Returns 1 if they are, 2 if there is also a BOM, or 0 otherwise.
+int hts_is_utf16_text(const kstring_t *str);
+
+// Entry point to hFILE_multipart backend.
+struct hFILE *hopen_htsget_redirect(struct hFILE *hfile, const char *mode);
+
+struct hts_path_itr {
+    kstring_t path, entry;
+    void *dirv;  // DIR * privately
+    const char *pathdir, *prefix, *suffix;
+    size_t prefix_len, suffix_len, entry_dir_l;
+};
+
+void hts_path_itr_setup(struct hts_path_itr *itr, const char *path,
+    const char *builtin_path, const char *prefix, size_t prefix_len,
+    const char *suffix, size_t suffix_len);
+
+const char *hts_path_itr_next(struct hts_path_itr *itr);
+
+typedef void plugin_void_func(void);
+plugin_void_func *load_plugin(void **pluginp, const char *filename, const char *symbol);
+void *plugin_sym(void *plugin, const char *name, const char **errmsg);
+plugin_void_func *plugin_func(void *plugin, const char *name, const char **errmsg);
+void close_plugin(void *plugin);
+const char *hts_plugin_path(void);
+
+/*
+ * Buffers up arguments to hts_idx_push for later use, once we've written all bar
+ * this block.  This is necessary when multiple blocks are in flight (threading).
+ *
+ * Returns 0 on success,
+ *        -1 on failure
+ */
+int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped);
+
+static inline int find_file_extension(const char *fn, char ext_out[static HTS_MAX_EXT_LEN])
+{
+    const char *delim = fn ? strstr(fn, HTS_IDX_DELIM) : NULL, *ext;
+    if (!fn) return -1;
+    if (!delim) delim = fn + strlen(fn);
+    for (ext = delim; ext > fn && *ext != '.' && *ext != '/'; --ext) {}
+    if (*ext == '.' &&
+        ((delim - ext == 3 && ext[1] == 'g' && ext[2] == 'z') || // permit .sam.gz as a valid file extension
+        (delim - ext == 4 && ext[1] == 'b' && ext[2] == 'g' && ext[3] == 'z'))) // permit .vcf.bgz as a valid file extension
+    {
+        for (ext--; ext > fn && *ext != '.' && *ext != '/'; --ext) {}
+    }
+    if (*ext != '.' || delim - ext > HTS_MAX_EXT_LEN || delim - ext < 3)
+        return -1;
+    memcpy(ext_out, ext + 1, delim - ext - 1);
+    ext_out[delim - ext - 1] = '\0';
+    return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/ext/htslib/hts_os.c
+++ b/ext/htslib/hts_os.c
@ -0,0 +1,59 @@
+/// @file hts_os.c
+/// Operating System specific tweaks, for compatibility with POSIX.
+/*
+   Copyright (C) 2017, 2019-2020 Genome Research Ltd.
+
+    Author: James Bonfield <jkb@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
+#include <config.h>
+#include "htslib/hts_defs.h"
+
+// Windows (maybe more) lack a drand48 implementation.
+#ifndef HAVE_DRAND48
+#include "os/rand.c"
+#else
+#include <stdlib.h>
+HTSLIB_EXPORT
+void hts_srand48(long seed)
+{
+#ifdef HAVE_SRAND48_DETERMINISTIC
+    srand48_deterministic(seed);
+#else
+    srand48(seed);
+#endif
+}
+
+HTSLIB_EXPORT
+double hts_erand48(unsigned short xseed[3]) { return erand48(xseed); }
+
+HTSLIB_EXPORT
+double hts_drand48(void) { return drand48(); }
+
+HTSLIB_EXPORT
+long hts_lrand48(void) { return lrand48(); }
+#endif
+
+// // On Windows when using the MSYS or Cygwin terminals, isatty fails
+// #ifdef _WIN32
+// #define USE_FILEEXTD
+// #include "os/iscygpty.c"
+// #endif
--- a/ext/htslib/hts_probe_cc.sh
+++ b/ext/htslib/hts_probe_cc.sh
@ -0,0 +1,143 @@
+#!/bin/sh
+
+# Check compiler options for non-configure builds and create Makefile fragment
+#
+#    Copyright (C) 2022-2024 Genome Research Ltd.
+#
+#    Author: Rob Davies <rmd@sanger.ac.uk>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Arguments are:
+# 1. C compiler command
+# 2. Initial CFLAGS
+# 3. LDFLAGS
+
+CC=$1
+CFLAGS=$2
+LDFLAGS=$3
+
+# Try running the compiler.  Uses the same contest.* names as
+# configure for temporary files.
+run_compiler ()
+{
+    $CC $CFLAGS $1 $LDFLAGS -o conftest conftest.c 2> conftest.err
+    retval=$?
+    rm -f conftest.err conftest
+    return $retval
+}
+
+# Run a test.  $1 is the flag to try, $2 is the Makefile variable to set
+# with the flag probe result, $3 is a Makefile variable which will be
+# set to 1 if the code was built successfully.  The code to test should
+# be passed in via fd 0.
+# First try compiling conftest.c without the flag.  If that fails, try
+# again with it to see if the flag is needed.
+run_test ()
+{
+    if [ $have_cpuid -ne 1 ] ; then
+        # Only test for and build SSE / AVX code if cpuid works as
+        # otherwise it won't be executed, even if present
+        echo "$3 ="
+        return
+    fi
+    rm -f conftest conftest.err conftest.c
+    cat - > conftest.c
+    if run_compiler ; then
+        echo "$2 ="
+        echo "$3 = 1"
+    elif run_compiler "$1" ; then
+        echo "$2 = $1"
+        echo "$3 = 1"
+    else
+        echo "$3 ="
+    fi
+}
+
+echo "# Compiler probe results, generated by $0"
+
+# Check for cpuid
+rm -f conftest conftest.err conftest.c
+cat > conftest.c <<'EOF'
+#include <cpuid.h>
+#include <stddef.h>
+int main(int argc, char **argv) {
+    unsigned int a, b, c, d;
+    int level = __get_cpuid_max(0, NULL);
+    if (level > 0)
+        __cpuid_count(1, 0, a, b, c, d);
+    return 0;
+}
+EOF
+if run_compiler ; then
+    echo "HTS_HAVE_CPUID = 1"
+    have_cpuid=1
+else
+    echo "HTS_HAVE_CPUID ="
+    have_cpuid=0
+fi
+
+# Check for sse4.1 etc. support
+run_test "-msse4.1 -mpopcnt -mssse3" HTS_CFLAGS_SSE4 HTS_BUILD_SSE4 <<'EOF'
+#ifdef __x86_64__
+#include "x86intrin.h"
+int main(int argc, char **argv) {
+    __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
+    __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b);
+    return _mm_popcnt_u32(*((char *) &c));
+}
+#else
+int main(int argc, char **argv) { return 0; }
+#endif
+EOF
+
+# Check for avx2
+
+run_test "-mavx2 -mpopcnt" HTS_CFLAGS_AVX2 HTS_BUILD_AVX2 <<'EOF'
+#ifdef __x86_64__
+#include "x86intrin.h"
+int main(int argc, char **argv) {
+    __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+    __m256i b = _mm256_add_epi32(a, a);
+    long long c = _mm256_extract_epi64(b, 0);
+    return _mm_popcnt_u32((int) c);
+}
+#else
+int main(int argc, char **argv) { return 0; }
+#endif
+EOF
+
+# Check for avx512
+
+run_test "-mavx512f -mpopcnt" HTS_CFLAGS_AVX512 HTS_BUILD_AVX512 <<'EOF'
+#ifdef __x86_64__
+#include "x86intrin.h"
+int main(int argc, char **argv) {
+    __m512i a = _mm512_set1_epi32(1);
+    __m512i b = _mm512_add_epi32(a, a);
+    __m256i c = _mm512_castsi512_si256(b);
+    __m256i d = _mm512_extracti64x4_epi64(a, 1);
+    return _mm_popcnt_u32(*((char *) &c)) + (*(char *) &d);
+}
+#else
+int main(int argc, char **argv) { return 0; }
+#endif
+EOF
+
+rm -f conftest.c
--- a/ext/htslib/hts_time_funcs.h
+++ b/ext/htslib/hts_time_funcs.h
@ -0,0 +1,170 @@
+/*  hts_time_funcs.h -- Implementations of non-standard time functions
+
+    Copyright (C) 2022 Genome Research Ltd.
+
+    Author: Rob Davies <rmd@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+/*
+  This mainly exists because timegm() is not a standard function, and so
+  Cannot be used in portable code.  Unfortunately the standard one (mktime)
+  always takes the local timezone into accout so doing a UTC conversion
+  with it involves changing the TZ environment variable, which is rather
+  messy and not likely to go well with threaded code.
+
+  The code here is a much simplified version of the BSD timegm() implementation.
+  It currently rejects dates before 1970, avoiding problems with -ve time_t.
+  It also works strictly in UTC, so doesn't have to worry about tm_isdst
+  which makes the calculation much easier.
+
+  Some of this is derived from BSD sources, for example
+  https://github.com/NetBSD/src/blob/trunk/lib/libc/time/localtime.c
+  which state:
+
+  ** This file is in the public domain, so clarified as of
+  ** 1996-06-05 by Arthur David Olson.
+
+  Non-derived code is copyright as above.
+*/
+
+#include <stdint.h>
+#include <limits.h>
+#include <errno.h>
+#include <time.h>
+
+static inline int hts_time_normalise(int *tens, int *units, int base) {
+    if (*units < 0 || *units >= base) {
+        int delta = *units >= 0 ? *units / base : (-1 - (-1 - *units) / base);
+        int64_t tmp = (int64_t) (*tens) + delta;
+        if (tmp < INT_MIN || tmp > INT_MAX) return 1;
+        *tens = tmp;
+        *units -= delta * base;
+    }
+    return 0;
+}
+
+static inline int hts_year_is_leap(int64_t year) {
+    return ((year % 4 == 0) && (year % 100 != 0)) || (year % 400 == 0);
+}
+
+// Number of leap years to start of year
+// Only works for year >= 1.
+static inline int64_t hts_leaps_to_year_start(int64_t year) {
+    --year;
+    return year / 4 - year / 100 + year / 400;
+}
+
+static inline int hts_time_normalise_tm(struct tm *t)
+{
+    const int days_per_mon[2][12] = {
+        { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
+        { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
+    };
+    const int year_days[2] = { 365, 366 };
+    int overflow = 0;
+    int64_t year;
+
+    if (t->tm_sec > 62) {
+        overflow |= hts_time_normalise(&t->tm_min, &t->tm_sec, 60);
+    }
+    overflow |= hts_time_normalise(&t->tm_hour, &t->tm_min,  60);
+    overflow |= hts_time_normalise(&t->tm_mday, &t->tm_hour, 24);
+    overflow |= hts_time_normalise(&t->tm_year, &t->tm_mon,  12);
+    if (overflow)
+        return 1;
+
+    year = (int64_t) t->tm_year + 1900LL;
+    while (t->tm_mday <= 0) {
+        --year;
+        t->tm_mday += year_days[hts_year_is_leap(year + (1 < t->tm_mon))];
+    }
+    while (t->tm_mday > 366) {
+        t->tm_mday -= year_days[hts_year_is_leap(year + (1 < t->tm_mon))];
+        ++year;
+    }
+    for (;;) {
+        int mdays = days_per_mon[hts_year_is_leap(year)][t->tm_mon];
+        if (t->tm_mday <= mdays)
+            break;
+        t->tm_mday -= mdays;
+        t->tm_mon++;
+        if (t->tm_mon >= 12) {
+            year++;
+            t->tm_mon = 0;
+        }
+    }
+    year -= 1900;
+    if (year != t->tm_year) {
+        if (year < INT_MIN || year > INT_MAX)
+            return 1;
+        t->tm_year = year;
+    }
+    return 0;
+}
+
+/**
+ *  Convert broken-down time to an equivalent time_t value
+ *  @param target  Target broken-down time structure
+ *  @return Equivalent time_t value on success; -1 on failure
+ *
+ *  This function first normalises the time in @p target so that the
+ *  structure members are in the valid range.  It then calculates the
+ *  number of seconds (ignoring leap seconds) between midnight Jan 1st 1970
+ *  and the target date.
+ *
+ *  If @p target is outside the range that can be represented in a time_t,
+ *  or tm_year is less than 70 (which would return a negative value) then
+ *  it returns -1 and sets errno to EOVERFLOW.
+ */
+
+static inline time_t hts_time_gm(struct tm *target)
+{
+    int month_start[2][12] = {
+        { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 },
+        { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 }
+    };
+    int years_from_epoch, leaps, days;
+    int64_t secs;
+
+    if (hts_time_normalise_tm(target) != 0)
+        goto overflow;
+
+    if (target->tm_year < 70)
+        goto overflow;
+
+    years_from_epoch = target->tm_year - 70;
+    leaps = (hts_leaps_to_year_start(target->tm_year + 1900)
+        - hts_leaps_to_year_start(1970));
+    days = ((365 * (years_from_epoch - leaps) + 366 * leaps)
+        + month_start[hts_year_is_leap(target->tm_year + 1900)][target->tm_mon]
+        + target->tm_mday - 1);
+    secs = ((int64_t) days * 86400LL
+        + target->tm_hour * 3600
+        + target->tm_min * 60
+        + target->tm_sec);
+    if (sizeof(time_t) < 8 && secs > INT_MAX)
+        goto overflow;
+
+    return (time_t) secs;
+
+ overflow:
+    errno = EOVERFLOW;
+    return (time_t) -1;
+}
--- a/ext/htslib/htscodecs/BENCHMARKS.md
+++ b/ext/htslib/htscodecs/BENCHMARKS.md
@ -0,0 +1,146 @@
+-c<XXYY> option species decode method XX encode method YY where
+
+    00 is scalar
+    01 is SSE4
+    02 is AVX2
+    04 is AVX512
+
+Input data is 10MB worth of NovaSeq quality values; approx 100k
+records.  Performance is data specific, so these figures are purely a
+snapshot and not indicative of all data types.  The test machine
+reports as:
+
+    Intel(R) Xeon(R) Gold 6142 CPU @ 2.60GHz
+
+The -o<order> field is a bit field where.
+
+    0/1   Order 0 or 1
+    4     32-way variant (permits SIMD)
+    64    RLE
+    128   Bit packing (4 novaseq quals to a byte)
+
+Hence -o133-c0202 is pack 4 quals to a bit and order-1 encode with AVX2
+32-way encode/decode.
+
+
+      r4x8-o0           10000000 uncomp,  665848 comp  395.3 enc MB/s  718.3 dec MB/s
+     r4x16-o0           10000000 uncomp,  665415 comp  400.3 enc MB/s  716.1 dec MB/s
+     arith-o0           10000000 uncomp,  660701 comp  105.0 enc MB/s   86.3 dec MB/s
+    
+      r4x8-o1           10000000 uncomp,  615304 comp  274.8 enc MB/s  385.1 dec MB/s
+     r4x16-o1           10000000 uncomp,  616134 comp  289.6 enc MB/s  536.6 dec MB/s
+     arith-o1           10000000 uncomp,  613736 comp   75.4 enc MB/s   87.1 dec MB/s
+    
+     r4x16-o64          10000000 uncomp,  712335 comp  382.0 enc MB/s  749.2 dec MB/s
+     arith-o64          10000000 uncomp,  744000 comp  153.1 enc MB/s  112.2 dec MB/s
+    
+     r4x16-o65          10000000 uncomp,  591457 comp  360.6 enc MB/s  705.8 dec MB/s
+     arith-o65          10000000 uncomp,  585233 comp  161.3 enc MB/s  117.7 dec MB/s
+    
+     r4x16-o128         10000000 uncomp,  615915 comp  780.2 enc MB/s 2092.5 dec MB/s
+     arith-o128         10000000 uncomp,  609977 comp  257.4 enc MB/s  219.0 dec MB/s
+    
+     r4x16-o129         10000000 uncomp,  553081 comp  645.1 enc MB/s 1394.1 dec MB/s
+     arith-o129         10000000 uncomp,  550377 comp  165.1 enc MB/s  180.7 dec MB/s
+    
+     r4x16-o192         10000000 uncomp,  621771 comp  513.1 enc MB/s 1003.0 dec MB/s
+     arith-o192         10000000 uncomp,  621415 comp  217.9 enc MB/s  180.8 dec MB/s
+    
+     r4x16-o193         10000000 uncomp,  550325 comp  474.1 enc MB/s  920.6 dec MB/s
+     arith-o193         10000000 uncomp,  543687 comp  195.7 enc MB/s  158.0 dec MB/s
+    
+    r32x16-o4-c0000     10000000 uncomp,  665501 comp  399.0 enc MB/s  613.9 dec MB/s
+    r32x16-o4-c0101     10000000 uncomp,  665501 comp  402.1 enc MB/s  968.0 dec MB/s
+    r32x16-o4-c0202     10000000 uncomp,  665501 comp  690.8 enc MB/s 1796.0 dec MB/s
+    r32x16-o4-c0404     10000000 uncomp,  665501 comp  866.9 enc MB/s 2098.6 dec MB/s
+    
+    r32x16-o5-c0000     10000000 uncomp,  616223 comp  274.6 enc MB/s  426.5 dec MB/s
+    r32x16-o5-c0101     10000000 uncomp,  616223 comp  274.1 enc MB/s  626.8 dec MB/s
+    r32x16-o5-c0202     10000000 uncomp,  616223 comp  391.8 enc MB/s 1472.8 dec MB/s
+    r32x16-o5-c0404     10000000 uncomp,  616223 comp  563.5 enc MB/s 1673.9 dec MB/s
+    
+    r32x16-o68-c0000    10000000 uncomp,  712513 comp  363.8 enc MB/s  717.4 dec MB/s
+    r32x16-o68-c0101    10000000 uncomp,  712513 comp  384.7 enc MB/s  836.5 dec MB/s
+    r32x16-o68-c0202    10000000 uncomp,  712513 comp  438.8 enc MB/s  913.6 dec MB/s
+    r32x16-o68-c0404    10000000 uncomp,  712513 comp  450.8 enc MB/s  918.0 dec MB/s
+    
+    r32x16-o69-c0000    10000000 uncomp,  591639 comp  369.7 enc MB/s  684.2 dec MB/s
+    r32x16-o69-c0101    10000000 uncomp,  591639 comp  370.2 enc MB/s  780.1 dec MB/s
+    r32x16-o69-c0202    10000000 uncomp,  591639 comp  408.5 enc MB/s  894.9 dec MB/s
+    r32x16-o69-c0404    10000000 uncomp,  591639 comp  431.6 enc MB/s  906.5 dec MB/s
+    
+    r32x16-o132-c0000   10000000 uncomp,  615999 comp  659.2 enc MB/s 1861.9 dec MB/s
+    r32x16-o132-c0101   10000000 uncomp,  615999 comp  660.0 enc MB/s 2580.6 dec MB/s
+    r32x16-o132-c0202   10000000 uncomp,  615999 comp  971.6 enc MB/s 3679.2 dec MB/s
+    r32x16-o132-c0404   10000000 uncomp,  615999 comp 1050.6 enc MB/s 3947.9 dec MB/s
+    
+    r32x16-o133-c0000   10000000 uncomp,  553181 comp  573.2 enc MB/s  848.8 dec MB/s
+    r32x16-o133-c0101   10000000 uncomp,  553181 comp  566.3 enc MB/s 1517.0 dec MB/s
+    r32x16-o133-c0202   10000000 uncomp,  553181 comp  759.1 enc MB/s 1923.8 dec MB/s
+    r32x16-o133-c0404   10000000 uncomp,  553181 comp  914.4 enc MB/s 1981.4 dec MB/s
+    
+    r32x16-o194-c0000   10000000 uncomp,  621771 comp  558.0 enc MB/s 1085.0 dec MB/s
+    r32x16-o194-c0101   10000000 uncomp,  621771 comp  559.2 enc MB/s 1088.6 dec MB/s
+    r32x16-o194-c0202   10000000 uncomp,  621771 comp  552.9 enc MB/s 1091.2 dec MB/s
+    r32x16-o194-c0404   10000000 uncomp,  621771 comp  550.1 enc MB/s 1070.3 dec MB/s
+    
+    r32x16-o197-c0000   10000000 uncomp,  550497 comp  484.2 enc MB/s  791.8 dec MB/s
+    r32x16-o197-c0101   10000000 uncomp,  550497 comp  487.2 enc MB/s 1004.4 dec MB/s
+    r32x16-o197-c0202   10000000 uncomp,  550497 comp  488.0 enc MB/s 1033.9 dec MB/s
+    r32x16-o197-c0404   10000000 uncomp,  550497 comp  502.0 enc MB/s 1027.6 dec MB/s
+
+For completeness, a couple other tools are also shown below.  Note
+fqzcomp here is slightly smaller as it has been trimmed to end on a
+whole line.
+
+    fqzcomp -s1          9999975 uncomp,  494485 comp   27.4 enc MB/s   27.1 dec MB/s
+
+    bsc -m3e1tT         10000000 uncomp,  553958 comp   43.7 enc MB/s   31.6 dec MB/s
+    bsc -m0e2tT         10000000 uncomp,  531536 comp   19.0 enc MB/s   25.5 dec MB/s
+
+-----------------------------------------------------------------------------
+
+10MB worth of Illumina HiSeq data with 40 distinct quality values.
+Note this sequencing run had a few erratic cycles, leading to
+unusually good performance from fqzcomp.  The bit-packing modes of
+rANS are not relevant (nor shown) here due to the cardinality of the
+data.
+
+      r4x8-o0             10000000 uncomp,    5092977 comp  303.9 enc MB/s  553.3 dec MB/s
+     r4x16-o0             10000000 uncomp,    5092608 comp  357.4 enc MB/s  579.8 dec MB/s
+     arith-o0             10000000 uncomp,    5079029 comp   51.9 enc MB/s   33.1 dec MB/s
+
+      r4x8-o1             10000000 uncomp,    4911113 comp  278.1 enc MB/s  356.4 dec MB/s
+     r4x16-o1             10000000 uncomp,    4918609 comp  290.5 enc MB/s  542.4 dec MB/s
+     arith-o1             10000000 uncomp,    4911347 comp   42.1 enc MB/s   32.3 dec MB/s
+
+     r4x16-o64            10000000 uncomp,    5092608 comp  215.5 enc MB/s  782.7 dec MB/s
+     arith-o64            10000000 uncomp,    5194241 comp   36.8 enc MB/s   26.6 dec MB/s
+
+     r4x16-o65            10000000 uncomp,    4918609 comp  167.0 enc MB/s  484.0 dec MB/s
+     arith-o65            10000000 uncomp,    4909925 comp   33.4 enc MB/s   23.8 dec MB/s
+
+    r32x16-o4-c0000       10000000 uncomp,    5092684 comp  367.2 enc MB/s  642.1 dec MB/s
+    r32x16-o4-c0101       10000000 uncomp,    5092684 comp  340.7 enc MB/s 1005.1 dec MB/s
+    r32x16-o4-c0202       10000000 uncomp,    5092684 comp  666.8 enc MB/s 1777.5 dec MB/s
+    r32x16-o4-c0404       10000000 uncomp,    5092684 comp  827.0 enc MB/s 2158.9 dec MB/s
+
+    r32x16-o5-c0000       10000000 uncomp,    4918685 comp  273.9 enc MB/s  391.5 dec MB/s
+    r32x16-o5-c0101       10000000 uncomp,    4918685 comp  268.5 enc MB/s  524.0 dec MB/s
+    r32x16-o5-c0202       10000000 uncomp,    4918685 comp  396.0 enc MB/s 1218.2 dec MB/s
+    r32x16-o5-c0404       10000000 uncomp,    4918685 comp  553.4 enc MB/s 1418.4 dec MB/s
+
+    r32x16-o68-c0000      10000000 uncomp,    5092684 comp  216.3 enc MB/s  646.6 dec MB/s
+    r32x16-o68-c0101      10000000 uncomp,    5092684 comp  235.2 enc MB/s 1016.3 dec MB/s
+    r32x16-o68-c0202      10000000 uncomp,    5092684 comp  336.4 enc MB/s 1804.4 dec MB/s
+    r32x16-o68-c0404      10000000 uncomp,    5092684 comp  376.5 enc MB/s 2162.2 dec MB/s
+
+    r32x16-o69-c0000      10000000 uncomp,    4918685 comp  194.3 enc MB/s  390.1 dec MB/s
+    r32x16-o69-c0101      10000000 uncomp,    4918685 comp  195.3 enc MB/s  593.4 dec MB/s
+    r32x16-o69-c0202      10000000 uncomp,    4918685 comp  251.6 enc MB/s 1212.7 dec MB/s
+    r32x16-o69-c0404      10000000 uncomp,    4918685 comp  306.3 enc MB/s 1415.6 dec MB/s
+
+    fqzcomp -s1           10000000 uncomp,    3196746 comp   16.6 enc MB/s  16.0 dec MB/s
+
+    bsc -m3e1tT           10000000 uncomp,    4762846 comp   12.9 enc MB/s  17.5 dec MB/s
+    bsc -m0e2tT           10000000 uncomp,    4477056 comp    6.1 enc MB/s   8.8 dec MB/s
--- a/ext/htslib/htscodecs/LICENSE.md
+++ b/ext/htslib/htscodecs/LICENSE.md
@ -0,0 +1,45 @@
+All files except those explicitly listed below are copyright Genome
+Research Limited and are made available under the BSD license.
+
+> Redistribution and use in source and binary forms, with or without
+> modification, are permitted provided that the following conditions
+> are met:
+> 
+>     (1) Redistributions of source code must retain the above copyright
+>     notice, this list of conditions and the following disclaimer. 
+> 
+>     (2) Redistributions in binary form must reproduce the above copyright
+>     notice, this list of conditions and the following disclaimer in
+>     the documentation and/or other materials provided with the distribution.  
+>     
+>     (3)The name of the author may not be used to endorse or promote
+>     products derived from this software without specific prior written
+>     permission.
+> 
+> THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+> IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+> WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+> DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+> INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+> (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+> SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+> HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+> STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+> IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+> POSSIBILITY OF SUCH DAMAGE. 
+
+c_range_coder.h is Public Domain, derived from work by Eugene
+Shelwien.
+
+rANS_byte.h and rANS_word.h are derived from Fabien Giesen's work and
+is Public Domain.  https://github.com/rygorous/ryg_rans This work was
+in turn based on the ANS family of entropy encoders as described by
+Jarek Duda's paper: http://arxiv.org/abs/1311.2540
+
+> To the extent possible under law, Fabian Giesen has waived all
+> copyright and related or neighboring rights to ryg_rans, as
+> per the terms of the CC0 license:
+> 
+>   https://creativecommons.org/publicdomain/zero/1.0
+> 
+> This work is published from the United States.
--- a/ext/htslib/htscodecs/MAINTAINERS.md
+++ b/ext/htslib/htscodecs/MAINTAINERS.md
@ -0,0 +1,55 @@
+Notes to maintainers for building releases.
+This is best done as a release PR so we can check it first.
+
+1. Places to update the version number include:
+
+   - htscodecs/htscodecs.h (used for program introspection)
+
+   - configure.ac AC_INIT macro
+
+   - configure.ac VERS_CURRENT, VERS_REVISION and VERS_AGE variables.
+     See the long comment above for instructions of how these change.
+
+   - NEWS files.
+
+
+2. Ensure NEWS and README files are up to date.  NEWS is a git log
+   summary.  README likely doesn't change unless something major needs
+   mentioning.
+
+   - At time of merging, set the date at the top of NEWS.
+
+
+3. Test it all.
+   - Push to github PR so the CI can validate for us.
+
+   - make distcheck
+     This also makes the tarball htscodecs-${vers}.tar.gz.
+
+
+4. Merge into master
+
+
+5. Add an annotated tag with minimal message, eg:
+
+   - git tag -a v1.1 -m v1.1
+
+
+6. Push master and --tags upstream to github
+
+
+7. Make a new release on github.
+
+   - Title: "htscodecs ${vers}"
+
+   - Message: this is just a copy of NEWS.
+     It's already in Markdown format, but double check the preview panel.
+
+   - Upload the tarball produced from distcheck to the assets.
+
+
+8. Finally, consider updating any packages that use this as a
+   submodule to ensure they have the latest tagged release.
+
+   This will invariably help OS distributions keep their package
+   dependencies neatly in sync.
--- a/ext/htslib/htscodecs/NEWS.md
+++ b/ext/htslib/htscodecs/NEWS.md
@ -0,0 +1,438 @@
+Release 1.6.1: 22nd August 2024
+-------------------------------
+
+This release is primarily portability and minor bug fixes.
+
+Changes
+
+- Improve warning levels by the compiler in CI. (#125)
+
+- Switch to GitHub actions for some CI builds. (#121, #123)
+
+- Add configure check for cpuid systems. (#115, #116.  Reported by
+  Ryan Carsten Schmidt)
+
+Bug fixes
+
+- Use unsigned chars for ctype macros in the name tokeniser.
+  On many systems this was already mitigated against, but on some OSes
+  a char > 128 could trigger a buffer underrun. (#124)
+
+- Fix interaction between _XOPEN_SOURCE and FreeBSD.
+  (#119, John Marshall)
+
+- Improve AVX512 compiler support, notably MacOS El Capitan's XCode.
+  (#118, Rob Davies)
+
+- Fix -std=c99 -pendantic pedantry (#117)
+
+
+Release 1.6.0: 7th December 2023
+--------------------------------
+
+This release is primarily bug fixes, mostly spotted through improved fuzz
+testing.
+
+One big change however is the SIMD rANS codecs are now performant on Intel
+CPUs with the DownFall mitigation microcode applied.
+
+
+Changes
+
+- Replaced the rANS codec SIMD gathers with simulated gathers via scalar
+  memory fetches.  This helps AMD Zen4, but importantly it also fixes a
+  disastrous performance regression caused by Intel's DownFall microcode fix.
+
+  There is an impact on pre-DownFall speeds, but we should focus on patched
+  CPUs as a priority.
+
+- A small speed up to the rans_F_to_s3 function used by order-0 rans decode.
+
+- Small speed up to SIMD rans32x16 order-1 encoder by reducing cache misses.
+  Also sped up the rans4x8 order-1 encoder, particularly on AMD Zen4.
+
+- Now supports building with "zig cc"
+  (Issue #109, reported by David Jackson)
+
+
+Bug fixes
+
+- Improve robustness of name tokeniser when given non 7-bit ASCII and on
+  machines where "char" defaults to unsigned.
+  (Issue #105, reported by Shubham Chandak)
+
+- Also fixed a 1 byte buffer read-overrun in name tokeniser.
+
+- Fix name tokeniser encoder failure with some duplicated streams.
+
+- Fixed rans_set_cpu to work multiple times, as well as reinstating the
+  ability to change decode and encode side independently (accidentally lost in
+  commit 958032c).  No effect on usage, but it improves the test coverage.
+
+- Added a round-trip fuzz tester to test the ability to encode.  The old fuzz
+  testing was decode streams only.
+
+- Fixed bounds checking in rans_uncompress_O0_32x16_avx2, fixing buffer read
+  overruns.
+
+- Removed undefined behaviour in transpose_and_copy(), fixing zig cc builds.
+
+
+Release 1.5.2: 6th October 2023
+-------------------------------
+
+*** SECURITY FIXES ***
+
+This release contains multiple bug fixes, including a couple
+buffer overruns that could corrupt memory when used in specific
+scenarios.  These have not been observed with real data, but could
+represent an attack vector for a malicious user.  (We know of no
+exploit.)
+
+
+Changes
+
+- The range coder has been extended to do bounds checking if the
+  new RC_SetOutputEnd() is called.  This has a small performance hit
+  for the encoder, depending on compiler, but tests showed within 10%
+  at worst.
+
+Bug fixes
+
+- Fix write-buffer overruns in fqzcomp and name tokeniser.
+
+  SECURITY ISSUE: FQZComp could overflow the computed maximum growth
+  size, causing writes beyond the ends of the allocated memory.  This
+  is triggered by many very small 1bp reads.  Fixed the maximum
+  bounds for compressed data.
+
+  SECURITY ISSUE: The name tokeniser using the maximum number of
+  tokens (128) would erroneously write a 129th token.  This is a
+  restricted overflow of a few bytes.
+
+  (PR#97, reported by Shubham Chandak)
+
+- Fix an maximum 8-byte read overflow in the AVX2 rans decoder.
+  SECURITY ISSUE: This was only present when using gcc.
+  (PR#100, reported by Rob Davies)
+
+- The rANS Order-1 SSE4 decoder could decode incorrectly.
+  When a single symbol only occurs and we're using 12-bit freqs, the
+  frequency of 4096 was interpreted as freq 0.  This only happens in
+  the non-SIMD tidy-up stage at the end of the decode, so at worst the
+  final 31 bytes may be incorrect. (PR#102)
+
+- Fixed a 1-byte heap read-buffer overflow. Existed since 6a87ead2
+  (Oct 2021).  Low severity security due to size and high likelihood
+  it's just malloc meta-data. (PR#95; OSS-Fuzz 62270)
+
+- rans_compress_4x16 now works on zero length input.
+  Previously this was giving divide-by-zero errors.
+  (PR#101, reported by Shubham Chandak)
+
+- Remove asserts which caused warnings about unused variables when
+  building with -DNDEBUG.
+
+- Fix ARM builds when HWCAP_ASIMD is missing (on Conda) (PR#91)
+
+- Improve FreeBSD CI testing
+
+- Fix undefined behaviour from signed bit-shifting (PR#90).
+
+
+Release 1.5.1: 19th July 2023
+-----------------------------
+
+This release is mainly small updates and bug fixes focusing on
+specific platforms, with no new features added.
+
+Changes
+
+- Be more selective in use of AVX512 on AMD Zen4 processors.  This can
+  be faster (e.g. with 64-way unrolling), but in the current rANS codec
+  implementations AVX2 is faster for certain operations (PR#85).
+
+- Add config.h to test programs to help them pick up definitions such
+  as XOPEN_SOURCE (PR#84)
+
+- Add FreeBSD to CI testing (PR#83)
+
+Bug fixes
+
+- Trivial bug fix to the rans4x16pr test harness when given
+  incompressible data (PR#86).
+
+- Make ARM NEON checks specific to AArch64 and exclude AArch32 systems.
+  (PR#82 to fix issue#81, reported by Robert Clausecker)
+
+
+Release 1.5.0: 14th April 2023
+------------------------------
+
+Changes
+
+- Significant speed ups to the fqzcomp codec via code restructuring
+  and use of memory prefetch instructions.  Encode is 30-40% faster
+  and decode 5-8% faster. (PR#75 James Bonfield)
+
+- Improve multiarch builds on MacOS, fixing issues with getting the
+  various SIMD implementations integrated. (Issue#76 John Marshall,
+  PR#77/#78 Rob Davies)
+
+- Remove unused ax_with_libdeflate.m4 file from build system.
+
+
+Release 1.4.0: Februrary 2023
+-----------------------------
+
+This is almost entirely minor bug fixing with a few small updates.
+
+Changes
+
+- Optimise compression / speed of the name tokeniser.
+  - In arithmetic coding mode, it can now utilise bzip2 at higher levels.
+  - For both rans / arith entropy encoders, the choice of method / order
+    is now optimised per token type, giving faster compression.
+  - Culled a pointless zlib check in the configure script.
+  - Made lack of bzip2 a hard failure in configure, unless an explicit
+    --disable-bz2 option is given.
+  (#72, #73)
+
+- Switch CI to use ARM for MacOS builds
+  (#69, thanks to Rob Davies)
+
+
+Bug fixes
+
+- Remove some newer compiler warnings (#61)
+
+- Improvements for Intel -m32 builds, including better AVX2 validation
+  (m32 misses _mm256_extract_epi64) and improved data alignment.
+  (#62. See also samtools/htslib#1500)
+
+- Detect Neon capability at runtime via operating system APIs.
+  (#63, thanks to John Marshall)
+
+- Improve FreeBSD diagnostics when neglecting to use -lpthread / -lthr. 
+  Plus additional extra error checking too.
+  (#68, #64, thanks to John Marshall)
+
+- Update hts_pack to operate in line with CRAMcodecs spec, where the
+  number of symbols > 16.
+  (#65/#66, reported by Michael Macias)
+
+- Fixed too-stringent buffer overflow checking in O1 rans decoder.
+  (#71, reported by Divon Lan)
+
+
+Release 1.3.0: 9th August 2022
+------------------------------
+
+The primary change in this release is a new SIMD enabled rANS codec.
+
+Changes
+
+- There is a 32-way unrolled rANS implementation.  This is accessed
+  using the existing rans 4x16 API with the RANS_ORDER_X32 bit set.
+  Implementations exist for SSE4.1, AVX2, AVX512 and ARM Neon, as
+  well as traditional non-SIMD scalar code in C and JavaScript. See
+  the commit logs for benchmarks.
+
+- Improved memory allocation via a new htscodecs_tls_alloc function.
+  This uses Thread Local Storage (TLS) to avoid multiple malloc/free
+  calls, reducing system CPU time.
+
+- Some external functions have been renamed, with the old ones still
+  existing in a deprecated fashion.  Every symbol should now start
+  hts_, rans_, arith_, fqz_ or tok3_*.
+
+- Improved test framework with an "entropy" tool that iterates over
+  all entropy encoders.
+
+- Updated the Appveyor CI image to user a newer gcc.  Also added ARM
+  to the list of processors to test on.
+
+- Tab vs space code changes.  Use "git diff -w" to see through these.
+
+- Reworked fuzzing infrastructure.
+
+- Small speed improvements to various rANS encoders and decoders.
+  These were tested on a broad range of compilers, versions and
+  systems.  The new code may be slightly slower with some combinations,
+  but is faster overall and removes a few outliers with considerably
+  degraded performance.
+
+- Substantial memory reduction to the name tokeniser (tok3).
+
+Bug fixes
+
+- Fixed undefined behaviour in our use of _builtin_clz().
+
+- Fixed a few redundant #includes.
+
+- Work around strict aliasing bugs, uncovered with gcc -O2.
+
+- Fixed an issue with encoding data blocks close to 2GB in size.
+  (Additionally blocks above 2GB now error, rather than crashing or
+  returning incorrect results.)
+
+- Fix encode error with large blocks using RANS_ORDER_STRIPE.
+
+
+Release 1.2.2: 1st April 2022
+-----------------------------
+
+This release contains some fixes found during fuzzing with Clang's
+memory-sanitizer.  None of these are involving writing memory so there
+is no possibility for code execution vulnerabilities.  However some do
+could access uninitialised elements in locally allocated memory, which
+could leak private data if the library was used in conjunction with
+other tools which don't zero sensitive data before freeing.
+
+Bug fixes:
+
+- The name tokeniser now validates the stored length in the data
+  stream matches the actual decoded length.  Discovered by Taotao Gu.
+
+- Fixed an endless loop in arith_dynamic and rans4x16pr involving
+  X_STRIPE with 0 stripes.
+
+- Avoid a harmless (and wrong?) undefined behaviour sanitizer error
+  when calling memcpy(ptr, NULL, 0) in the name tokeniser.
+
+- Fixed possible uninitialised memory access in
+  rans_uncompress_O1_4x16.  If the frequency table didn't add up to
+  the correct amount, parts of the "fb" table were left unpopulated.
+  It was then possible to use these array elements in some of the rANS
+  calculations.
+
+- Similarly rans_uncompress_O0 could access an uninitialised element
+  4095 of the decoder tables if the frequencies summed to 4095 instead
+  of the expected 4096.
+
+- Improved error detection from fqzcomp's read_array function.
+
+- Reject fqzcomp parameters with inconsistent "sel" parameters, which
+  could lead to uninitialised access to the model.sel range coder.
+
+
+Release 1.2.1: 15th February 2022
+---------------------------------
+
+The only change in this release is a minor adjustment to the histogram
+code so it works on systems with small stacks.  This was detected on
+Windows Mingw builds.
+
+
+Release 1.2: 10th February 2022
+-------------------------------
+
+This release contains the following minor changes.
+Please see the "git log" for the full details.
+
+Improvements / changes:
+
+- Speed up of rANS4x16 order-0.  We now use a branchless encoder
+  renormalisation step.  For complex data it's between 13 and 50%
+  speed up depending on compiler.
+
+- Improve rANS4x16 compute_shift estimates.  The entropy calculation
+  is now more accurate.  This leads to more frequent use of the 10-bit
+  frequency mode, at an expense of up to 1% size growth.
+
+- Speed improvements to the striped rANS mode, both encoding and
+  decoding.  Encoder gains ~8% and decoder ~5%, but varies
+  considerably by compiler and data.
+
+- Added new var_put_u64_safe and var_put_u32_safe interfaces.
+  These are automatically used by var_put_u64 and var_put_u32 when
+  near the end of the buffer, but may also be called directly.
+
+- Small speed ups to the hist8 and hist1_4 functions.
+
+- Minor speed up to RLE decoding.
+
+Bug fixes:
+
+- Work around an icc-2021 compiler bug, but also speed up the varint
+  encoding too (#29).
+
+- Fix an off-by-one error in the initial size check in arith_dynamic.
+  This meant the very smallest of blocks could fail to decode.
+  Reported by Divon Lan.
+
+- Fixed hist1_4 to also count the last byte when computing T0[].
+
+- Fixed overly harsh bounds checking in the fqzcomp read_array
+  function, which meant it failed to decode some configurations.
+
+
+Release 1.1.1: 6th July 2021
+----------------------------
+
+This release contains the following minor changes.
+Please see the "git log" for the full details.
+
+Improvements / changes:
+
+- Modernised autoconf usage to avoid warnings with newer versions.
+  (John Marshall)
+
+- Avoid using awk with large records, due to some systems
+  (e.g. Solaris / OpenIndiana) with line length limits .
+  (John Marshall)
+
+- Applied Debian patch to make the library link against -lm.
+
+Bug fixes:
+
+- Fixed an issue with the name tokeniser when a slice (name_context)
+  has exactly 1 more name than the previous call. (James Bonfield)
+
+- Removed access to an uninitialised variable in the name tokeniser
+  decode when given malformed data.  This occurs when we use delta
+  encoding for the very first name. (James Bonfield, OSS-Fuzz)
+
+- Minor fixes to distcheck and distclean targets
+
+
+Release 1.0: 23rd Feb 2021
+--------------------------
+
+This marks the first non-beta release of htscodecs, following a
+perioid of integration with Htslib and automated fuzzing by Google's
+OSS-Fuzz program.
+
+[Note this testing only applies to the C implementation.  The
+JavaScript code should still be considered as examples of the codecs,
+more for purposes of understanding and clarity than as a fully
+optimised and tested release.]
+
+Since the last release (0.5) the key changes are:
+
+- Improved support for big endian platforms
+
+- Speed improvements to CRAM 3.0 4x8 rANS order-1 encoding.
+  It's between 10 and 50% faster at encoding, based on input data.
+
+- Improved autoconf bzip2 checks and tidy up "make test" output.
+
+- Added some more files into "make install", so that "make distcheck"
+  now passes.
+
+- Replaced Travis with Cirrus-CI testing.
+
+- Removed various C undefined behaviour, such as left shifting of
+  negative values and integer overflows.  As far as we know these were
+  currently harmless on the supported platforms, but may break future
+  compiler optimisations.
+
+- Fixed numerous OSS-Fuzz identified flaws.  Some of these were
+  potential security issues such as small buffer overruns.
+
+- Tidied up some code to prevent warnings.
+
+- The name tokeniser now has a limit on the size of data it can encode
+  (10 million records).  This may still be too high given the memory
+  it will require, so it may be reduced again.
+
--- a/ext/htslib/htscodecs/htscodecs/arith_dynamic.c
+++ b/ext/htslib/htscodecs/htscodecs/arith_dynamic.c
--- a/ext/htslib/htscodecs/htscodecs/arith_dynamic.h
+++ b/ext/htslib/htscodecs/htscodecs/arith_dynamic.h
@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ARITH_DYNAMIC_H
+#define ARITH_DYNAMIC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned char *arith_compress(unsigned char *in, unsigned int in_size,
+                              unsigned int *out_size, int order);
+
+unsigned char *arith_uncompress(unsigned char *in, unsigned int in_size,
+                                unsigned int *out_size);
+
+unsigned char *arith_compress_to(unsigned char *in,  unsigned int in_size,
+                                 unsigned char *out, unsigned int *out_size,
+                                 int order);
+
+unsigned char *arith_uncompress_to(unsigned char *in, unsigned int in_size,
+                                   unsigned char *out, unsigned int *out_sz);
+
+unsigned int arith_compress_bound(unsigned int size, int order);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ARITH_DYNAMIC_H */
--- a/ext/htslib/htscodecs/htscodecs/c_range_coder.h
+++ b/ext/htslib/htscodecs/htscodecs/c_range_coder.h
@ -0,0 +1,166 @@
+// Copyright Eugene Shelwien.
+// Release into public domain.
+
+// Modifications by James Bonfield (2019)
+
+
+/*
+ * Note it is up to the calling code to ensure that no overruns on input and
+ * output buffers occur.
+ *
+ * Call the input() and output() functions to set and query the current
+ * buffer locations.
+ *
+
+ */
+
+#ifndef C_RANGER_CODER_H
+#define C_RANGER_CODER_H
+
+#define  DO(n)     int _;for (_=0; _<n; _++)
+#define  TOP       (1<<24)
+#define  Thres (unsigned)255*TOP
+
+typedef unsigned char uc;
+
+typedef struct {
+    uint32_t low, code, range;
+    uint32_t FFNum;  // Number of consecutive FFs
+    uint32_t Cache;  // Top 8-bits of low ready to emit
+    uint32_t Carry;  // Flag to indicate if we emit Cache or Cache+1
+    uc *in_buf;
+    uc *out_buf;
+    uc *in_end;
+    uc *out_end;
+    int err;
+} RangeCoder;
+
+static inline void RC_SetInput(RangeCoder *rc, char *in, char *in_end) {
+    rc->out_buf = rc->in_buf = (uc *)in;
+    rc->in_end = (uc *)in_end;
+}
+
+// NB: call RC_SetOutput first, and then RC_SetOutputEnd
+static inline void RC_SetOutput(RangeCoder *rc, char *out) { rc->in_buf = rc->out_buf = (uc *)out; rc->out_end = NULL;}
+static inline void RC_SetOutputEnd(RangeCoder *rc, char *out_end) { rc->out_end = (uc *)out_end; }
+static inline char *RC_GetInput(RangeCoder *rc) { return (char *)rc->in_buf; }
+static inline char *RC_GetOutput(RangeCoder *rc) { return (char *)rc->out_buf; }
+static inline size_t RC_OutSize(RangeCoder *rc) { return rc->out_buf - rc->in_buf; }
+static inline size_t RC_InSize(RangeCoder *rc) { return rc->in_buf - rc->out_buf; }
+
+static inline void RC_StartEncode(RangeCoder *rc)
+{ 
+    rc->range = 0xFFFFFFFF;
+    rc->low   = 0;
+    rc->FFNum = 0;
+    rc->Carry = 0;
+    rc->Cache = 0;
+    rc->code  = 0;
+    rc->err   = 0;
+}
+
+static inline void RC_StartDecode(RangeCoder *rc)
+{ 
+    rc->range = 0xFFFFFFFF;
+    rc->low   = 0;
+    rc->FFNum = 0;
+    rc->Carry = 0;
+    rc->Cache = 0;
+    rc->code  = 0;
+    rc->err   = 0;
+    if (rc->in_buf+5 > rc->in_end) {
+        rc->in_buf = rc->in_end; // prevent decode
+        return;
+    }
+    DO(5) rc->code = (rc->code<<8) | *rc->in_buf++;
+}
+
+static inline void RC_ShiftLowCheck(RangeCoder *rc) {
+    if (rc->low < Thres || rc->Carry) {
+        if (rc->out_end && rc->FFNum >= rc->out_end - rc->out_buf) {
+            rc->err = -1;
+            return;
+        }
+
+        *rc->out_buf++ = rc->Cache + rc->Carry;
+
+        // Flush any stored FFs
+        while (rc->FFNum) {
+            *rc->out_buf++ = rc->Carry-1; // (Carry-1)&255;
+            rc->FFNum--;
+        }
+
+        // Take copy of top byte ready for next flush
+        rc->Cache = rc->low >> 24;
+        rc->Carry = 0;
+    } else {
+        // Low if FFxx xxxx.  Bump FF count and shift in as before
+        rc->FFNum++;
+    }
+    rc->low = rc->low<<8;
+}
+
+static inline void RC_ShiftLow(RangeCoder *rc) {
+    if (rc->low < Thres || rc->Carry) {
+        *rc->out_buf++ = rc->Cache + rc->Carry;
+
+        // Flush any stored FFs
+        while (rc->FFNum) {
+            *rc->out_buf++ = rc->Carry-1; // (Carry-1)&255;
+            rc->FFNum--;
+        }
+
+        // Take copy of top byte ready for next flush
+        rc->Cache = rc->low >> 24;
+        rc->Carry = 0;
+    } else {
+        // Low if FFxx xxxx.  Bump FF count and shift in as before
+        rc->FFNum++;
+    }
+    rc->low = rc->low<<8;
+}
+
+static inline int RC_FinishEncode(RangeCoder *rc)
+{ 
+    DO(5) RC_ShiftLowCheck(rc);
+    return rc->err;
+}
+
+static inline int RC_FinishDecode(RangeCoder *rc) {
+    return rc->err;
+}
+
+static inline void RC_Encode (RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t totFreq) 
+{
+    uint32_t tmp = rc->low;
+    rc->low  += cumFreq * (rc->range/= totFreq);
+    rc->range*= freq;
+
+    rc->Carry += rc->low<tmp; // Overflow
+
+    while (rc->range < TOP) {
+        rc->range <<= 8;
+        RC_ShiftLowCheck(rc);
+    }
+}
+
+static inline uint32_t RC_GetFreq (RangeCoder *rc, uint32_t totFreq) {
+    //return rc->code/(rc->range/=totFreq);
+    return (totFreq && rc->range >= totFreq) ? rc->code/(rc->range/=totFreq) : 0;
+}
+
+static inline void RC_Decode (RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t totFreq) 
+{
+    rc->code -= cumFreq * rc->range;
+    rc->range *= freq;
+    while (rc->range < TOP) {
+        if (rc->in_buf >= rc->in_end) {
+            rc->err = -1;
+            return;
+        }
+        rc->code = (rc->code<<8) + *rc->in_buf++;
+        rc->range <<= 8;
+    }
+}
+
+#endif /* C_RANGER_CODER_H */
--- a/ext/htslib/htscodecs/htscodecs/c_simple_model.h
+++ b/ext/htslib/htscodecs/htscodecs/c_simple_model.h
@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2012, 2018-2019 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include "c_range_coder.h"
+
+/*
+ *--------------------------------------------------------------------------
+ * A simple frequency model.
+ *
+ * Define NSYM to be an integer value before including this file.
+ * It will then generate types and functions specific to that
+ * maximum number of symbols.
+ *
+ * This keeps a list of symbols and their frequencies, approximately
+ * sorted by symbol frequency. We allow for a single symbol to periodically
+ * move up the list when emitted, effectively doing a single step of
+ * bubble sort periodically. This means it's largely the same complexity
+ * irrespective of alphabet size.
+ * It's more efficient on strongly biased distributions than random data.
+ *
+ * There is no escape symbol, so the model is tailored to relatively
+ * stationary samples (although we do have occasional normalisation to
+ * avoid frequency counters getting too high).
+ *--------------------------------------------------------------------------
+ */
+
+//-----------------------------------------------------------------------------
+// Bits we want included once only - constants, types, etc
+#ifndef C_SIMPLE_MODEL_H
+#define C_SIMPLE_MODEL_H
+
+#define MAX_FREQ (1<<16)-17
+#define PASTE3(a,b,c) a##b##c
+#define SIMPLE_MODEL(a,b) PASTE3(SIMPLE_MODEL,a,b)
+#define STEP 16
+typedef struct {
+    uint16_t Freq;
+    uint16_t Symbol;
+} SymFreqs;
+#endif /* C_SIMPLE_MODEL_H */
+
+
+//-----------------------------------------------------------------------------
+// Bits we regenerate for each NSYM value.
+
+typedef struct {
+    uint32_t TotFreq;  // Total frequency
+
+    // Array of Symbols approximately sorted by Freq. 
+    SymFreqs sentinel, F[NSYM+1], terminal;
+} SIMPLE_MODEL(NSYM,_);
+
+
+static inline void SIMPLE_MODEL(NSYM,_init)(SIMPLE_MODEL(NSYM,_) *m, int max_sym) {
+    int i;
+    
+    for (i=0; i<max_sym; i++) {
+        m->F[i].Symbol = i;
+        m->F[i].Freq   = 1;
+    }
+    for (; i<NSYM; i++) {
+        m->F[i].Symbol = i;
+        m->F[i].Freq   = 0;
+    }
+
+    m->TotFreq         = max_sym;
+    m->sentinel.Symbol = 0;
+    m->sentinel.Freq   = MAX_FREQ; // Always first; simplifies sorting.
+    m->terminal.Symbol = 0;
+    m->terminal.Freq   = MAX_FREQ;
+    m->F[NSYM].Freq    = 0; // terminates normalize() loop. See below.
+}
+
+
+static inline void SIMPLE_MODEL(NSYM,_normalize)(SIMPLE_MODEL(NSYM,_) *m) {
+    SymFreqs *s;
+
+    /* Faster than F[i].Freq for 0 <= i < NSYM */
+    m->TotFreq=0;
+    for (s = m->F; s->Freq; s++) {
+        s->Freq -= s->Freq>>1;
+        m->TotFreq += s->Freq;
+    }
+}
+
+static inline void SIMPLE_MODEL(NSYM,_encodeSymbol)(SIMPLE_MODEL(NSYM,_) *m,
+                                                    RangeCoder *rc, uint16_t sym) {
+    SymFreqs *s = m->F;
+    uint32_t AccFreq  = 0;
+
+    while (s->Symbol != sym)
+        AccFreq += s++->Freq;
+
+    RC_Encode(rc, AccFreq, s->Freq, m->TotFreq);
+    s->Freq    += STEP;
+    m->TotFreq += STEP;
+
+    if (m->TotFreq > MAX_FREQ)
+        SIMPLE_MODEL(NSYM,_normalize)(m);
+
+    /* Keep approx sorted */
+    if (s[0].Freq > s[-1].Freq) {
+        SymFreqs t = s[0];
+        s[0] = s[-1];
+        s[-1] = t;
+    }
+}
+
+static inline uint16_t SIMPLE_MODEL(NSYM,_decodeSymbol)(SIMPLE_MODEL(NSYM,_) *m, RangeCoder *rc) {
+    SymFreqs* s = m->F;
+    uint32_t freq = RC_GetFreq(rc, m->TotFreq);
+    uint32_t AccFreq;
+
+    if (freq > MAX_FREQ)
+        return 0; // error
+
+    for (AccFreq = 0; (AccFreq += s->Freq) <= freq; s++)
+        ;
+    if (s - m->F > NSYM)
+        return 0; // error
+
+    AccFreq -= s->Freq;
+
+    RC_Decode(rc, AccFreq, s->Freq, m->TotFreq);
+    s->Freq    += STEP;
+    m->TotFreq += STEP;
+
+    if (m->TotFreq > MAX_FREQ)
+        SIMPLE_MODEL(NSYM,_normalize)(m);
+
+    /* Keep approx sorted */
+    if (s[0].Freq > s[-1].Freq) {
+        SymFreqs t = s[0];
+        s[0] = s[-1];
+        s[-1] = t;
+        return t.Symbol;
+    }
+
+    return s->Symbol;
+}
--- a/ext/htslib/htscodecs/htscodecs/fqzcomp_qual.c
+++ b/ext/htslib/htscodecs/htscodecs/fqzcomp_qual.c
--- a/ext/htslib/htscodecs/htscodecs/fqzcomp_qual.h
+++ b/ext/htslib/htscodecs/htscodecs/fqzcomp_qual.h
@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2011-2013, 2018-2019 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FQZ_COMP_QUAL_H
+#define FQZ_COMP_QUAL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/* Bit flags, deliberately mirroring BAM ones */
+#define FQZ_FREVERSE 16
+#define FQZ_FREAD2 128
+
+/* Current FQZ format version */
+#define FQZ_VERS 5
+
+#define FQZ_MAX_STRAT 3
+
+/*
+ * Minimal per-record information taken from a cram slice.
+ *
+ * To compress we need to know the junction from one quality string to
+ * the next (len), whether it is first/second read and whether it is
+ * reverse complemented (flags).
+ */
+typedef struct {
+    int num_records;
+    uint32_t *len;    // of size num_records
+    uint32_t *flags;  // of size num_records
+} fqz_slice;
+
+
+// Global flags
+static const int GFLAG_MULTI_PARAM = 1;
+static const int GFLAG_HAVE_STAB   = 2;
+static const int GFLAG_DO_REV      = 4;
+
+// Param flags
+// Add PFLAG_HAVE_DMAP and a dmap[] for delta incr?
+static const int PFLAG_DO_DEDUP    = 2;
+static const int PFLAG_DO_LEN      = 4;
+static const int PFLAG_DO_SEL      = 8;
+static const int PFLAG_HAVE_QMAP   = 16;
+static const int PFLAG_HAVE_PTAB   = 32;
+static const int PFLAG_HAVE_DTAB   = 64;
+static const int PFLAG_HAVE_QTAB   = 128;
+
+/*
+ * FQZ parameters.  These may be simply passed in as NULL to fqz_compress
+ * and it'll automatically choose, but if we wish to have complete control
+ * then this (long) struct contains all the details.
+ *
+ * TODO: document all this!
+ */
+
+// A single parameter block
+typedef struct {
+    // Starting context value
+    uint16_t context;
+
+    // flags
+    unsigned int pflags;
+    unsigned int do_sel, do_dedup, store_qmap, fixed_len;
+    unsigned char use_qtab, use_dtab, use_ptab;
+
+    // context bits and locations
+    unsigned int qbits, qloc;
+    unsigned int pbits, ploc;
+    unsigned int dbits, dloc;
+    unsigned int sbits, sloc;
+
+    // models
+    int max_sym, nsym, max_sel;
+
+    // tables / maps
+    unsigned int qmap[256];
+    unsigned int qtab[256];
+    unsigned int ptab[1024];
+    unsigned int dtab[256];
+
+    // Not stored paramters, but computed as part of encoder
+    // parameterisation.
+    int qshift;
+    int pshift;
+    int dshift;
+    int sshift;
+    unsigned int qmask; // (1<<qbits)-1
+    int do_r2, do_qa;
+} fqz_param;
+
+// The global params, which is a collection of parameter blocks plus
+// a few pieces of meta-data.
+typedef struct {
+    int vers;               // Format version; Set to FQZ_VERS
+    unsigned int gflags;    // global param flags
+    int nparam;             // Number of fqz_param blocks
+    int max_sel;            // Number of selector values
+    unsigned int stab[256]; // Selector to parameter no. table
+
+    int max_sym;            // max symbol value across all sub-params
+
+    fqz_param *p;           // 1 or more parameter blocks
+} fqz_gparams;
+
+
+/** Compress a block of quality values.
+ *
+ * @param vers          The CRAM version number (<<8) plus fqz strategy (0-3)
+ * @param s             Length and flag data CRAM per-record
+ * @param in            Buffer of concatenated quality values (no separator)
+ * @param in_size       Size of in buffer
+ * @param out_size      Size of returned output
+ * @param strat         FQZ compression strategy (0 to FQZ_MAX_STRAT)
+ * @param gp            Optional fqzcomp paramters (may be NULL).
+ *
+ * @return              The compressed quality buffer on success,
+ *                      NULL on failure.
+ */
+char *fqz_compress(int vers, fqz_slice *s, char *in, size_t in_size,
+                   size_t *out_size, int strat, fqz_gparams *gp);
+
+/** Decompress a block of quality values.
+ *
+ * @param in            Buffer of compressed quality values
+ * @param in_size       Size of in buffer
+ * @param out_size      Size of returned output
+ * @param lengths       Optional array filled out with record lengths.
+ *                      May be NULL.  If not, preallocate it to correct size.
+ *
+ * @return              The uncompressed concatenated qualities on success,
+ *                      NULL on failure.
+ */
+char *fqz_decompress(char *in, size_t in_size, size_t *out_size,
+                     int *lengths, int nlengths);
+
+/** A utlity function to analyse a quality buffer to gather statistical
+ *  information.  This is written into qhist and pm.  This function is only
+ *  useful if you intend on passing your own fqz_gparams block to
+ *  fqz_compress.
+ */
+void fqz_qual_stats(fqz_slice *s,
+                    unsigned char *in, size_t in_size,
+                    fqz_param *pm,
+                    uint32_t qhist[256],
+                    int one_param);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/ext/htslib/htscodecs/htscodecs/htscodecs.c
+++ b/ext/htslib/htscodecs/htscodecs/htscodecs.c
@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "htscodecs.h"
+#include "version.h"
+
+/*
+ * A const string form of the HTSCODECS_VERSION define.
+ * NB: This is obtained from the auto-generated version.h, so
+ * we can include release number and git hash.
+ */
+const char *htscodecs_version(void) {
+    return HTSCODECS_VERSION_TEXT;
+}
--- a/ext/htslib/htscodecs/htscodecs/htscodecs.h
+++ b/ext/htslib/htscodecs/htscodecs/htscodecs.h
@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2024 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTSCODECS_H
+#define HTSCODECS_H
+
+/*
+ * Version X.Y.Z encoded as XYYYZZ.
+ * We mainly increment X and Y.  Z *may* get bumped in between official
+ * releases in order to distinguish untagged github checkouts from
+ * official release tarballs.
+ *
+ * Note currently this needs manually editing as it isn't automatically
+ * updated by autoconf.
+ */
+#define HTSCODECS_VERSION 100601
+
+/*
+ * A const string form of the HTSCODECS_VERSION define.
+ * NB: This is obtained from the auto-generated version.h, so
+ * we can include release number and git hash.
+ */
+const char *htscodecs_version(void);
+
+#endif /* HTSCODECS_H */
--- a/ext/htslib/htscodecs/htscodecs/htscodecs_endian.h
+++ b/ext/htslib/htscodecs/htscodecs/htscodecs_endian.h
@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Genome Research Ltd.
+ * Author(s): James Bonfield
+ * 
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ * 
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *    Institute nor the names of its contributors may be used to endorse
+ *    or promote products derived from this software without specific
+ *    prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTSCODECS_ENDIAN_H
+#define HTSCODECS_ENDIAN_H
+
+// Endianness checking.
+
+// Sets HTSCODECS_ENDIAN_KNOWN if system type detected and either
+// HTSCODECS_LITTLE_ENDIAN or HTSCODECS_BIG_ENDIAN.
+
+/*
+ * In general our preferred route is to write code in an endian agnostic
+ * fashion, but our data formats are natively little endian.  Therefore
+ * in time critical code it's sometimes best to exploit that.
+ *
+ * Therefore we'll optimise code along the lines of:
+ *
+ * #ifdef HTSCODECS_LITTLE_ENDIAN
+ *     // do something little endian specific
+ * #else
+ *     // do something in an endian agnostic fashion
+ * #endif
+ *
+ * This means our code works even if we fail to recognise the
+ * specific machine type.
+ */
+
+#if (defined(__i386__)      \
+ ||  defined(__i386)        \
+ ||  defined(__amd64__)     \
+ ||  defined(__amd64)       \
+ ||  defined(__x86_64__)    \
+ ||  defined(__x86_64)      \
+ ||  defined(__i686__)      \
+ ||  defined(__i686))       \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
+ ||  defined(__LITTLE_ENDIAN__)                                            \
+ ||  defined(__ARMEL__)     \
+ ||  defined(__THUMBEL__)   \
+ ||  defined(__AARCH64EL__) \
+ ||  defined(_MIPSEL)       \
+ ||  defined(__MIPSEL)      \
+ ||  defined(__MIPSEL__) 
+    // Little endian
+#   define HTSCODECS_LITTLE_ENDIAN
+#   define HTSCODECS_ENDIAN_KNOWN
+#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
+   ||  defined(__BIG_ENDIAN__) \
+   || defined(__ARMEB__)       \
+   || defined(__THUMBEB__)     \
+   || defined(__AAARCHEB__)    \
+   || defined(_MIPSEB)         \
+   || defined(__MIPSEB)        \
+   || defined(__MIPSEB__)
+    // Big endian
+#   define HTSCODECS_BIG_ENDIAN
+#   define HTSCODECS_ENDIAN_KNOWN
+#else
+//    Unknown - code will need to check HTSCODES_ENDIAN_KNOWN and do endian agnostic
+#endif
+
+#endif /* HTSCODECS_ENDIAN_H */
--- a/ext/htslib/htscodecs/htscodecs/pack.c
+++ b/ext/htslib/htscodecs/htscodecs/pack.c
@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2019-2020, 2022 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "pack.h"
+
+//-----------------------------------------------------------------------------
+
+/*
+ * Packs multiple symbols into a single byte if the total alphabet of symbols
+ * used is <= 16.  Each new symbol takes up 1, 2, 4 or 8 bits, or 0 if the
+ * alphabet used is 1 (constant).
+ *
+ * If successful, out_meta/out_meta_len are set to hold the mapping table
+ * to be used during decompression.
+ *
+ * Returns the packed buffer on success with new length in out_len,
+ *         NULL of failure
+ */
+uint8_t *hts_pack(uint8_t *data, int64_t len,
+                  uint8_t *out_meta, int *out_meta_len, uint64_t *out_len) {
+    int p[256] = {0}, n;
+    uint64_t i, j;
+
+    // count syms
+    for (i = 0; i < len; i++)
+        p[data[i]]=1;
+    
+    for (i = n = 0; i < 256; i++) {
+        if (p[i]) {
+            p[i] = n++; // p[i] is now the code number
+            out_meta[n] = i;
+        }
+    }
+    out_meta[0] = n; // 256 wraps to 0
+    j = n+1;
+
+    // 1 value per byte
+    if (n > 16)
+        return NULL;
+
+    uint8_t *out = malloc(len+1);
+    if (!out)
+        return NULL;
+
+    // Work out how many values per byte to encode.
+    int val_per_byte;
+    if (n > 4)
+        val_per_byte = 2;
+    else if (n > 2)
+        val_per_byte = 4;
+    else if (n > 1)
+        val_per_byte = 8;
+    else
+        val_per_byte = 0; // infinite
+
+    *out_meta_len = j;
+    j = 0;
+
+    switch (val_per_byte) {
+    case 2:
+        for (i = 0; i < (len & ~1); i+=2)
+            out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<4);
+        switch (len-i) {
+        case 1: out[j++] = p[data[i]];
+        }
+        *out_len = j;
+        return out;
+
+    case 4: {
+        for (i = 0; i < (len & ~3); i+=4)
+            out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<2) | (p[data[i+2]]<<4) | (p[data[i+3]]<<6);
+        out[j] = 0;
+        int s = len-i, x = 0;
+        switch (s) {
+        case 3: out[j] |= p[data[i++]] << x; x+=2; // fall-through
+        case 2: out[j] |= p[data[i++]] << x; x+=2; // fall-through
+        case 1: out[j] |= p[data[i++]] << x; x+=2;
+            j++;
+        }
+        *out_len = j;
+        return out;
+    }
+
+    case 8: {
+        for (i = 0; i < (len & ~7); i+=8)
+            out[j++] = (p[data[i+0]]<<0) | (p[data[i+1]]<<1) | (p[data[i+2]]<<2) | (p[data[i+3]]<<3)
+                     | (p[data[i+4]]<<4) | (p[data[i+5]]<<5) | (p[data[i+6]]<<6) | (p[data[i+7]]<<7);
+        out[j] = 0;
+        int s = len-i, x = 0;
+        switch (s) {
+        case 7: out[j] |= p[data[i++]] << x++; // fall-through
+        case 6: out[j] |= p[data[i++]] << x++; // fall-through
+        case 5: out[j] |= p[data[i++]] << x++; // fall-through
+        case 4: out[j] |= p[data[i++]] << x++; // fall-through
+        case 3: out[j] |= p[data[i++]] << x++; // fall-through
+        case 2: out[j] |= p[data[i++]] << x++; // fall-through
+        case 1: out[j] |= p[data[i++]] << x++;
+            j++;
+        }
+        *out_len = j;
+        return out;
+    }
+
+    case 0:
+        *out_len = j;
+        return out;
+    }
+
+    return NULL;
+}
+
+
+/*
+ * Unpacks the meta-data portions of the hts_pack algorithm.
+ * This consists of the count of symbols and their values.
+ *
+ * The "map" array is filled out with the used symbols.
+ * "nsym" is set to contain the number of symbols per byte;
+ * 0, 1, 2, 4 or 8.
+ *
+ * Returns number of bytes of data[] consumed on success,
+ *         zero on failure.
+ */
+uint8_t hts_unpack_meta(uint8_t *data, uint32_t data_len,
+                        uint64_t udata_len, uint8_t *map, int *nsym) {
+    if (data_len == 0)
+        return 0;
+
+    // Number of symbols used
+    unsigned int n = data[0];
+    if (n == 0)
+        n = 256;
+
+    // Symbols per byte
+    if (n <= 1)
+        *nsym = 0;
+    else if (n <= 2)
+        *nsym = 8;
+    else if (n <= 4)
+        *nsym = 4;
+    else if (n <= 16)
+        *nsym = 2;
+    else {
+        *nsym = 1; // no packing
+        return 1;
+    }
+
+    if (data_len <= 1)
+        return 0;
+
+    int j = 1, c = 0;
+    do {
+        map[c++] = data[j++];
+    } while (c < n && j < data_len);
+
+    return c < n ? 0 : j;
+}
+
+/*
+ * Unpacks a packed data steam (given the unpacked meta-data).
+ *
+ * "map" is the pack map, mapping 0->n to the expanded symbols.
+ * The "out" buffer must be preallocated by the caller to be the correct
+ * size.  For error checking purposes, out_len is set to the size of
+ * this buffer.
+ *
+ * Returns uncompressed data (out) on success,
+ *         NULL on failure.
+ */
+uint8_t *hts_unpack(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) {
+    //uint8_t *out;
+    uint8_t c = 0;
+    int64_t i, j = 0, olen;
+
+    if (nsym == 1) {
+        // raw data; FIXME: shortcut the need for malloc & memcpy here
+        memcpy(out, data, len);
+        return out;
+    }
+
+    switch(nsym) {
+    case 8: {
+        union {
+            uint64_t w;
+            uint8_t c[8];
+        } map[256];
+        int x;
+        for (x = 0; x < 256; x++) {
+            map[x].c[0] = p[x>>0&1];
+            map[x].c[1] = p[x>>1&1];
+            map[x].c[2] = p[x>>2&1];
+            map[x].c[3] = p[x>>3&1];
+            map[x].c[4] = p[x>>4&1];
+            map[x].c[5] = p[x>>5&1];
+            map[x].c[6] = p[x>>6&1];
+            map[x].c[7] = p[x>>7&1];
+        }
+        if ((out_len+7)/8 > len)
+            return NULL;
+        olen = out_len & ~7;
+
+        for (i = 0; i < olen; i+=8)
+            memcpy(&out[i], &map[data[j++]].w, 8);
+
+        if (out_len != olen) {
+            c = data[j++];
+            while (i < out_len) {
+                out[i++] = p[c & 1];
+                c >>= 1;
+            }
+        }
+        break;
+    }
+
+    case 4: {
+        union {
+            uint32_t w;
+            uint8_t c[4];
+        } map[256];
+
+        int x, y, z, _, P=0;
+        for (x = 0; x < 4; x++)
+            for (y = 0; y < 4; y++)
+                for (z = 0; z < 4; z++)
+                    for (_ = 0; _ < 4; _++, P++) {
+                        map[P].c[0] = p[_];
+                        map[P].c[1] = p[z];
+                        map[P].c[2] = p[y];
+                        map[P].c[3] = p[x];
+                    }
+
+        if ((out_len+3)/4 > len)
+            return NULL;
+        olen = out_len & ~3;
+
+        for (i = 0; i < olen-12; i+=16) {
+            uint32_t w[] = {
+                map[data[j+0]].w,
+                map[data[j+1]].w,
+                map[data[j+2]].w,
+                map[data[j+3]].w
+            };
+            j += 4;
+            memcpy(&out[i], &w, 16);
+        }
+
+        for (; i < olen; i+=4)
+            memcpy(&out[i], &map[data[j++]].w, 4);
+
+        if (out_len != olen) {
+            c = data[j++];
+            while (i < out_len) {
+                out[i++] = p[c & 3];
+                c >>= 2;
+            }
+        }
+        break;
+    }
+
+    case 2: {
+        union {
+            uint16_t w;
+            uint8_t c[2];
+        } map[256];
+
+        int x, y;
+        for (x = 0; x < 16; x++) {
+            for (y = 0; y < 16; y++) {
+                map[x*16+y].c[0] = p[y];
+                map[x*16+y].c[1] = p[x];
+            }
+        }
+
+        if ((out_len+1)/2 > len)
+            return NULL;
+        olen = out_len & ~1;
+
+        for (i = j = 0; i+2 < olen; i+=4) {
+            uint16_t w[] = {
+                map[data[j+0]].w,
+                map[data[j+1]].w
+            };
+            memcpy(&out[i], &w, 4);
+
+            j += 2;
+        }
+
+        for (; i < olen; i+=2)
+            memcpy(&out[i], &map[data[j++]].w, 2);
+
+        if (out_len != olen) {
+            c = data[j++];
+            out[i+0] = p[c&15];
+        }
+        break;
+    }
+
+    case 0:
+        memset(out, p[0], out_len);
+        break;
+
+    default:
+        return NULL;
+    }
+
+    return out;
+}
+
+
+uint8_t *hts_unpack_(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) {
+    //uint8_t *out;
+    uint8_t c = 0;
+    int64_t i, j = 0, olen;
+
+    if (nsym == 1) {
+        // raw data; FIXME: shortcut the need for malloc & memcpy here
+        memcpy(out, data, len);
+        return out;
+    }
+
+    switch(nsym) {
+    case 2: {
+        uint16_t map[256], x, y;
+        for (x = 0; x < 16; x++)
+            for (y = 0; y < 16; y++)
+                map[x*16+y] = p[x]*256+p[y];
+
+        if ((out_len+1)/2 > len)
+            return NULL;
+        olen = out_len & ~1;
+
+        uint16_t *o16 = (uint16_t *)out;
+        for (i = 0; i+4 < olen/2; i+=4) {
+            int k;
+            for (k = 0; k < 4; k++)
+                o16[i+k] = map[data[i+k]];
+        }
+        j = i; i *= 2;
+
+        for (; i < olen; i+=2) {
+            uint16_t w1 = map[data[j++]];
+            *(uint16_t *)&out[i] = w1;
+        }
+
+        if (out_len != olen) {
+            c = data[j++];
+            out[i+0] = p[c&15];
+        }
+        break;
+    }
+
+    default:
+        return NULL;
+    }
+
+    return out;
+}
--- a/ext/htslib/htscodecs/htscodecs/pack.h
+++ b/ext/htslib/htscodecs/htscodecs/pack.h
@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTS_PACK_H
+#define HTS_PACK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packs multiple symbols into a single byte if the total alphabet of symbols
+ * used is <= 16.  Each new symbol takes up 1, 2, 4 or 8 bits, or 0 if the
+ * alphabet used is 1 (constant).
+ *
+ * If successful, out_meta/out_meta_len are set to hold the mapping table
+ * to be used during decompression.
+ *
+ * Returns the packed buffer on success with new length in out_len,
+ *         NULL of failure
+ */
+uint8_t *hts_pack(uint8_t *data, int64_t len,
+                  uint8_t *out_meta, int *out_meta_len, uint64_t *out_len);
+
+/*
+ * Unpacks the meta-data portions of the hts_pack algorithm.
+ * This consists of the count of symbols and their values.
+ *
+ * The "map" array is filled out with the used symbols.
+ * "nsym" is set to contain the number of symbols per byte;
+ * 0, 1, 2, 4 or 8.
+ *
+ * Returns number of bytes of data[] consumed on success,
+ *         zero on failure.
+ */
+uint8_t hts_unpack_meta(uint8_t *data, uint32_t data_len,
+                        uint64_t udata_len, uint8_t *map, int *nsym);
+
+/*
+ * Unpacks a packed data steam (given the unpacked meta-data).
+ *
+ * "map" is the pack map, mapping 0->n to the expanded symbols.
+ * The "out" buffer must be preallocated by the caller to be the correct
+ * size.  For error checking purposes, out_len is set to the size of
+ * this buffer.
+ *
+ * Returns uncompressed data (out) on success,
+ *         NULL on failure.
+ */
+uint8_t *hts_unpack(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *map);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTS_PACK_H */
--- a/ext/htslib/htscodecs/htscodecs/permute.h
+++ b/ext/htslib/htscodecs/htscodecs/permute.h
@ -0,0 +1,605 @@
+#ifdef MAIN
+#include <stdio.h>
+
+/*
+ * Shuffle based on input bits.
+ * So bit N true  => keep Nth byte.
+ *    bit N false => skip Nth byte.
+ */
+
+int main(void) {
+    int i, j;
+
+    FILE *fp = fopen(__FILE__, "r");
+    char line[8192];
+    while(fgets(line, 8192, fp)) {
+        printf("%s", line);
+    }
+    close(fp);
+    printf("\n");
+
+    // Decode table; distributes N adjacent values across lanes
+    printf("#define _ 9\n");
+    printf("static uint32_t permute[256][8] = { // reverse binary bit order\n");
+    for (i = 0; i < 256; i++) {
+        int b = 0;
+        int v[8] = {0};
+        for (j = 0; j < 8; j++) {
+            if (i & (1<<j)) {
+                v[j] = ++b;
+            }
+        }
+        printf("  { ");
+        for (j = 0; j < 8; j++) {
+            if (v[j])
+                printf("%d,", v[j]-1);
+            else
+                printf("_,");
+        }
+        printf("},\n");
+    }
+    printf("};\n\n");
+
+    // Encode table; collapses N values spread across lanes
+    printf("static uint32_t permutec[256][8] = { // reverse binary bit order\n"); 
+    for (i = 0; i < 256; i++) {
+        int b = 0;
+        int v[9] = {0};
+        for (j = 0; j < 8; j++) {
+            if (i & (1<<j)) {
+                v[b++] = j+1;
+            }
+        }
+        printf("  { ");
+        for (j = b-8; j < b; j++) {
+            if (j >= 0 && v[j])
+                printf("%d,", v[j]-1);
+            else
+                printf("_,");
+        }
+        printf("},\n");
+    }
+    printf("};\n");
+
+   return 0;
+}
+#endif
+
+/*
+ * These tables are 8k.  On older systems with small L1 cache, this may be
+ * a problem.
+ *
+ * #define PM(a,b,c,d,e,f,g,h) ((a<<0)|(b<<4)|(c<<8)|(d<<12)|(e<<16)|(f<<20)|(g<<24)|(h<<28))
+ *
+ * Instead of permute via
+ *    __m256i idx1 = _mm256_load_si256((const __m256i*)permute[imask1]);
+ *
+ * we can pack the indices and shift them back again
+ *   __m256i idx1 = _mm256_srlv_epi32(_mm256_set1_epi32(permute2[imask1]),
+ *                                    _mm256_set_epi32(28,24,20,16,12,8,4,0));
+ *
+ * However on my Haswell system this slows down r32x16b_avx2 from 1440 to
+ * 1200 MB/s decode speeds.
+ * It's much closer for order-1 decoder, but still doesn't help.
+ *
+ * The encoder side seems to make no difference either way or be very marginal.
+ */
+
+#define _ 9
+static uint32_t permute[256][8] __attribute__((aligned(32))) = { // reverse binary bit order
+  { _,_,_,_,_,_,_,_,},
+  { 0,_,_,_,_,_,_,_,},
+  { _,0,_,_,_,_,_,_,},
+  { 0,1,_,_,_,_,_,_,},
+  { _,_,0,_,_,_,_,_,},
+  { 0,_,1,_,_,_,_,_,},
+  { _,0,1,_,_,_,_,_,},
+  { 0,1,2,_,_,_,_,_,},
+  { _,_,_,0,_,_,_,_,},
+  { 0,_,_,1,_,_,_,_,},
+  { _,0,_,1,_,_,_,_,},
+  { 0,1,_,2,_,_,_,_,},
+  { _,_,0,1,_,_,_,_,},
+  { 0,_,1,2,_,_,_,_,},
+  { _,0,1,2,_,_,_,_,},
+  { 0,1,2,3,_,_,_,_,},
+  { _,_,_,_,0,_,_,_,},
+  { 0,_,_,_,1,_,_,_,},
+  { _,0,_,_,1,_,_,_,},
+  { 0,1,_,_,2,_,_,_,},
+  { _,_,0,_,1,_,_,_,},
+  { 0,_,1,_,2,_,_,_,},
+  { _,0,1,_,2,_,_,_,},
+  { 0,1,2,_,3,_,_,_,},
+  { _,_,_,0,1,_,_,_,},
+  { 0,_,_,1,2,_,_,_,},
+  { _,0,_,1,2,_,_,_,},
+  { 0,1,_,2,3,_,_,_,},
+  { _,_,0,1,2,_,_,_,},
+  { 0,_,1,2,3,_,_,_,},
+  { _,0,1,2,3,_,_,_,},
+  { 0,1,2,3,4,_,_,_,},
+  { _,_,_,_,_,0,_,_,},
+  { 0,_,_,_,_,1,_,_,},
+  { _,0,_,_,_,1,_,_,},
+  { 0,1,_,_,_,2,_,_,},
+  { _,_,0,_,_,1,_,_,},
+  { 0,_,1,_,_,2,_,_,},
+  { _,0,1,_,_,2,_,_,},
+  { 0,1,2,_,_,3,_,_,},
+  { _,_,_,0,_,1,_,_,},
+  { 0,_,_,1,_,2,_,_,},
+  { _,0,_,1,_,2,_,_,},
+  { 0,1,_,2,_,3,_,_,},
+  { _,_,0,1,_,2,_,_,},
+  { 0,_,1,2,_,3,_,_,},
+  { _,0,1,2,_,3,_,_,},
+  { 0,1,2,3,_,4,_,_,},
+  { _,_,_,_,0,1,_,_,},
+  { 0,_,_,_,1,2,_,_,},
+  { _,0,_,_,1,2,_,_,},
+  { 0,1,_,_,2,3,_,_,},
+  { _,_,0,_,1,2,_,_,},
+  { 0,_,1,_,2,3,_,_,},
+  { _,0,1,_,2,3,_,_,},
+  { 0,1,2,_,3,4,_,_,},
+  { _,_,_,0,1,2,_,_,},
+  { 0,_,_,1,2,3,_,_,},
+  { _,0,_,1,2,3,_,_,},
+  { 0,1,_,2,3,4,_,_,},
+  { _,_,0,1,2,3,_,_,},
+  { 0,_,1,2,3,4,_,_,},
+  { _,0,1,2,3,4,_,_,},
+  { 0,1,2,3,4,5,_,_,},
+  { _,_,_,_,_,_,0,_,},
+  { 0,_,_,_,_,_,1,_,},
+  { _,0,_,_,_,_,1,_,},
+  { 0,1,_,_,_,_,2,_,},
+  { _,_,0,_,_,_,1,_,},
+  { 0,_,1,_,_,_,2,_,},
+  { _,0,1,_,_,_,2,_,},
+  { 0,1,2,_,_,_,3,_,},
+  { _,_,_,0,_,_,1,_,},
+  { 0,_,_,1,_,_,2,_,},
+  { _,0,_,1,_,_,2,_,},
+  { 0,1,_,2,_,_,3,_,},
+  { _,_,0,1,_,_,2,_,},
+  { 0,_,1,2,_,_,3,_,},
+  { _,0,1,2,_,_,3,_,},
+  { 0,1,2,3,_,_,4,_,},
+  { _,_,_,_,0,_,1,_,},
+  { 0,_,_,_,1,_,2,_,},
+  { _,0,_,_,1,_,2,_,},
+  { 0,1,_,_,2,_,3,_,},
+  { _,_,0,_,1,_,2,_,},
+  { 0,_,1,_,2,_,3,_,},
+  { _,0,1,_,2,_,3,_,},
+  { 0,1,2,_,3,_,4,_,},
+  { _,_,_,0,1,_,2,_,},
+  { 0,_,_,1,2,_,3,_,},
+  { _,0,_,1,2,_,3,_,},
+  { 0,1,_,2,3,_,4,_,},
+  { _,_,0,1,2,_,3,_,},
+  { 0,_,1,2,3,_,4,_,},
+  { _,0,1,2,3,_,4,_,},
+  { 0,1,2,3,4,_,5,_,},
+  { _,_,_,_,_,0,1,_,},
+  { 0,_,_,_,_,1,2,_,},
+  { _,0,_,_,_,1,2,_,},
+  { 0,1,_,_,_,2,3,_,},
+  { _,_,0,_,_,1,2,_,},
+  { 0,_,1,_,_,2,3,_,},
+  { _,0,1,_,_,2,3,_,},
+  { 0,1,2,_,_,3,4,_,},
+  { _,_,_,0,_,1,2,_,},
+  { 0,_,_,1,_,2,3,_,},
+  { _,0,_,1,_,2,3,_,},
+  { 0,1,_,2,_,3,4,_,},
+  { _,_,0,1,_,2,3,_,},
+  { 0,_,1,2,_,3,4,_,},
+  { _,0,1,2,_,3,4,_,},
+  { 0,1,2,3,_,4,5,_,},
+  { _,_,_,_,0,1,2,_,},
+  { 0,_,_,_,1,2,3,_,},
+  { _,0,_,_,1,2,3,_,},
+  { 0,1,_,_,2,3,4,_,},
+  { _,_,0,_,1,2,3,_,},
+  { 0,_,1,_,2,3,4,_,},
+  { _,0,1,_,2,3,4,_,},
+  { 0,1,2,_,3,4,5,_,},
+  { _,_,_,0,1,2,3,_,},
+  { 0,_,_,1,2,3,4,_,},
+  { _,0,_,1,2,3,4,_,},
+  { 0,1,_,2,3,4,5,_,},
+  { _,_,0,1,2,3,4,_,},
+  { 0,_,1,2,3,4,5,_,},
+  { _,0,1,2,3,4,5,_,},
+  { 0,1,2,3,4,5,6,_,},
+  { _,_,_,_,_,_,_,0,},
+  { 0,_,_,_,_,_,_,1,},
+  { _,0,_,_,_,_,_,1,},
+  { 0,1,_,_,_,_,_,2,},
+  { _,_,0,_,_,_,_,1,},
+  { 0,_,1,_,_,_,_,2,},
+  { _,0,1,_,_,_,_,2,},
+  { 0,1,2,_,_,_,_,3,},
+  { _,_,_,0,_,_,_,1,},
+  { 0,_,_,1,_,_,_,2,},
+  { _,0,_,1,_,_,_,2,},
+  { 0,1,_,2,_,_,_,3,},
+  { _,_,0,1,_,_,_,2,},
+  { 0,_,1,2,_,_,_,3,},
+  { _,0,1,2,_,_,_,3,},
+  { 0,1,2,3,_,_,_,4,},
+  { _,_,_,_,0,_,_,1,},
+  { 0,_,_,_,1,_,_,2,},
+  { _,0,_,_,1,_,_,2,},
+  { 0,1,_,_,2,_,_,3,},
+  { _,_,0,_,1,_,_,2,},
+  { 0,_,1,_,2,_,_,3,},
+  { _,0,1,_,2,_,_,3,},
+  { 0,1,2,_,3,_,_,4,},
+  { _,_,_,0,1,_,_,2,},
+  { 0,_,_,1,2,_,_,3,},
+  { _,0,_,1,2,_,_,3,},
+  { 0,1,_,2,3,_,_,4,},
+  { _,_,0,1,2,_,_,3,},
+  { 0,_,1,2,3,_,_,4,},
+  { _,0,1,2,3,_,_,4,},
+  { 0,1,2,3,4,_,_,5,},
+  { _,_,_,_,_,0,_,1,},
+  { 0,_,_,_,_,1,_,2,},
+  { _,0,_,_,_,1,_,2,},
+  { 0,1,_,_,_,2,_,3,},
+  { _,_,0,_,_,1,_,2,},
+  { 0,_,1,_,_,2,_,3,},
+  { _,0,1,_,_,2,_,3,},
+  { 0,1,2,_,_,3,_,4,},
+  { _,_,_,0,_,1,_,2,},
+  { 0,_,_,1,_,2,_,3,},
+  { _,0,_,1,_,2,_,3,},
+  { 0,1,_,2,_,3,_,4,},
+  { _,_,0,1,_,2,_,3,},
+  { 0,_,1,2,_,3,_,4,},
+  { _,0,1,2,_,3,_,4,},
+  { 0,1,2,3,_,4,_,5,},
+  { _,_,_,_,0,1,_,2,},
+  { 0,_,_,_,1,2,_,3,},
+  { _,0,_,_,1,2,_,3,},
+  { 0,1,_,_,2,3,_,4,},
+  { _,_,0,_,1,2,_,3,},
+  { 0,_,1,_,2,3,_,4,},
+  { _,0,1,_,2,3,_,4,},
+  { 0,1,2,_,3,4,_,5,},
+  { _,_,_,0,1,2,_,3,},
+  { 0,_,_,1,2,3,_,4,},
+  { _,0,_,1,2,3,_,4,},
+  { 0,1,_,2,3,4,_,5,},
+  { _,_,0,1,2,3,_,4,},
+  { 0,_,1,2,3,4,_,5,},
+  { _,0,1,2,3,4,_,5,},
+  { 0,1,2,3,4,5,_,6,},
+  { _,_,_,_,_,_,0,1,},
+  { 0,_,_,_,_,_,1,2,},
+  { _,0,_,_,_,_,1,2,},
+  { 0,1,_,_,_,_,2,3,},
+  { _,_,0,_,_,_,1,2,},
+  { 0,_,1,_,_,_,2,3,},
+  { _,0,1,_,_,_,2,3,},
+  { 0,1,2,_,_,_,3,4,},
+  { _,_,_,0,_,_,1,2,},
+  { 0,_,_,1,_,_,2,3,},
+  { _,0,_,1,_,_,2,3,},
+  { 0,1,_,2,_,_,3,4,},
+  { _,_,0,1,_,_,2,3,},
+  { 0,_,1,2,_,_,3,4,},
+  { _,0,1,2,_,_,3,4,},
+  { 0,1,2,3,_,_,4,5,},
+  { _,_,_,_,0,_,1,2,},
+  { 0,_,_,_,1,_,2,3,},
+  { _,0,_,_,1,_,2,3,},
+  { 0,1,_,_,2,_,3,4,},
+  { _,_,0,_,1,_,2,3,},
+  { 0,_,1,_,2,_,3,4,},
+  { _,0,1,_,2,_,3,4,},
+  { 0,1,2,_,3,_,4,5,},
+  { _,_,_,0,1,_,2,3,},
+  { 0,_,_,1,2,_,3,4,},
+  { _,0,_,1,2,_,3,4,},
+  { 0,1,_,2,3,_,4,5,},
+  { _,_,0,1,2,_,3,4,},
+  { 0,_,1,2,3,_,4,5,},
+  { _,0,1,2,3,_,4,5,},
+  { 0,1,2,3,4,_,5,6,},
+  { _,_,_,_,_,0,1,2,},
+  { 0,_,_,_,_,1,2,3,},
+  { _,0,_,_,_,1,2,3,},
+  { 0,1,_,_,_,2,3,4,},
+  { _,_,0,_,_,1,2,3,},
+  { 0,_,1,_,_,2,3,4,},
+  { _,0,1,_,_,2,3,4,},
+  { 0,1,2,_,_,3,4,5,},
+  { _,_,_,0,_,1,2,3,},
+  { 0,_,_,1,_,2,3,4,},
+  { _,0,_,1,_,2,3,4,},
+  { 0,1,_,2,_,3,4,5,},
+  { _,_,0,1,_,2,3,4,},
+  { 0,_,1,2,_,3,4,5,},
+  { _,0,1,2,_,3,4,5,},
+  { 0,1,2,3,_,4,5,6,},
+  { _,_,_,_,0,1,2,3,},
+  { 0,_,_,_,1,2,3,4,},
+  { _,0,_,_,1,2,3,4,},
+  { 0,1,_,_,2,3,4,5,},
+  { _,_,0,_,1,2,3,4,},
+  { 0,_,1,_,2,3,4,5,},
+  { _,0,1,_,2,3,4,5,},
+  { 0,1,2,_,3,4,5,6,},
+  { _,_,_,0,1,2,3,4,},
+  { 0,_,_,1,2,3,4,5,},
+  { _,0,_,1,2,3,4,5,},
+  { 0,1,_,2,3,4,5,6,},
+  { _,_,0,1,2,3,4,5,},
+  { 0,_,1,2,3,4,5,6,},
+  { _,0,1,2,3,4,5,6,},
+  { 0,1,2,3,4,5,6,7,},
+};
+
+static uint32_t permutec[256][8] __attribute__((aligned(32))) = { // reverse binary bit order
+  { _,_,_,_,_,_,_,_,},
+  { _,_,_,_,_,_,_,0,},
+  { _,_,_,_,_,_,_,1,},
+  { _,_,_,_,_,_,0,1,},
+  { _,_,_,_,_,_,_,2,},
+  { _,_,_,_,_,_,0,2,},
+  { _,_,_,_,_,_,1,2,},
+  { _,_,_,_,_,0,1,2,},
+  { _,_,_,_,_,_,_,3,},
+  { _,_,_,_,_,_,0,3,},
+  { _,_,_,_,_,_,1,3,},
+  { _,_,_,_,_,0,1,3,},
+  { _,_,_,_,_,_,2,3,},
+  { _,_,_,_,_,0,2,3,},
+  { _,_,_,_,_,1,2,3,},
+  { _,_,_,_,0,1,2,3,},
+  { _,_,_,_,_,_,_,4,},
+  { _,_,_,_,_,_,0,4,},
+  { _,_,_,_,_,_,1,4,},
+  { _,_,_,_,_,0,1,4,},
+  { _,_,_,_,_,_,2,4,},
+  { _,_,_,_,_,0,2,4,},
+  { _,_,_,_,_,1,2,4,},
+  { _,_,_,_,0,1,2,4,},
+  { _,_,_,_,_,_,3,4,},
+  { _,_,_,_,_,0,3,4,},
+  { _,_,_,_,_,1,3,4,},
+  { _,_,_,_,0,1,3,4,},
+  { _,_,_,_,_,2,3,4,},
+  { _,_,_,_,0,2,3,4,},
+  { _,_,_,_,1,2,3,4,},
+  { _,_,_,0,1,2,3,4,},
+  { _,_,_,_,_,_,_,5,},
+  { _,_,_,_,_,_,0,5,},
+  { _,_,_,_,_,_,1,5,},
+  { _,_,_,_,_,0,1,5,},
+  { _,_,_,_,_,_,2,5,},
+  { _,_,_,_,_,0,2,5,},
+  { _,_,_,_,_,1,2,5,},
+  { _,_,_,_,0,1,2,5,},
+  { _,_,_,_,_,_,3,5,},
+  { _,_,_,_,_,0,3,5,},
+  { _,_,_,_,_,1,3,5,},
+  { _,_,_,_,0,1,3,5,},
+  { _,_,_,_,_,2,3,5,},
+  { _,_,_,_,0,2,3,5,},
+  { _,_,_,_,1,2,3,5,},
+  { _,_,_,0,1,2,3,5,},
+  { _,_,_,_,_,_,4,5,},
+  { _,_,_,_,_,0,4,5,},
+  { _,_,_,_,_,1,4,5,},
+  { _,_,_,_,0,1,4,5,},
+  { _,_,_,_,_,2,4,5,},
+  { _,_,_,_,0,2,4,5,},
+  { _,_,_,_,1,2,4,5,},
+  { _,_,_,0,1,2,4,5,},
+  { _,_,_,_,_,3,4,5,},
+  { _,_,_,_,0,3,4,5,},
+  { _,_,_,_,1,3,4,5,},
+  { _,_,_,0,1,3,4,5,},
+  { _,_,_,_,2,3,4,5,},
+  { _,_,_,0,2,3,4,5,},
+  { _,_,_,1,2,3,4,5,},
+  { _,_,0,1,2,3,4,5,},
+  { _,_,_,_,_,_,_,6,},
+  { _,_,_,_,_,_,0,6,},
+  { _,_,_,_,_,_,1,6,},
+  { _,_,_,_,_,0,1,6,},
+  { _,_,_,_,_,_,2,6,},
+  { _,_,_,_,_,0,2,6,},
+  { _,_,_,_,_,1,2,6,},
+  { _,_,_,_,0,1,2,6,},
+  { _,_,_,_,_,_,3,6,},
+  { _,_,_,_,_,0,3,6,},
+  { _,_,_,_,_,1,3,6,},
+  { _,_,_,_,0,1,3,6,},
+  { _,_,_,_,_,2,3,6,},
+  { _,_,_,_,0,2,3,6,},
+  { _,_,_,_,1,2,3,6,},
+  { _,_,_,0,1,2,3,6,},
+  { _,_,_,_,_,_,4,6,},
+  { _,_,_,_,_,0,4,6,},
+  { _,_,_,_,_,1,4,6,},
+  { _,_,_,_,0,1,4,6,},
+  { _,_,_,_,_,2,4,6,},
+  { _,_,_,_,0,2,4,6,},
+  { _,_,_,_,1,2,4,6,},
+  { _,_,_,0,1,2,4,6,},
+  { _,_,_,_,_,3,4,6,},
+  { _,_,_,_,0,3,4,6,},
+  { _,_,_,_,1,3,4,6,},
+  { _,_,_,0,1,3,4,6,},
+  { _,_,_,_,2,3,4,6,},
+  { _,_,_,0,2,3,4,6,},
+  { _,_,_,1,2,3,4,6,},
+  { _,_,0,1,2,3,4,6,},
+  { _,_,_,_,_,_,5,6,},
+  { _,_,_,_,_,0,5,6,},
+  { _,_,_,_,_,1,5,6,},
+  { _,_,_,_,0,1,5,6,},
+  { _,_,_,_,_,2,5,6,},
+  { _,_,_,_,0,2,5,6,},
+  { _,_,_,_,1,2,5,6,},
+  { _,_,_,0,1,2,5,6,},
+  { _,_,_,_,_,3,5,6,},
+  { _,_,_,_,0,3,5,6,},
+  { _,_,_,_,1,3,5,6,},
+  { _,_,_,0,1,3,5,6,},
+  { _,_,_,_,2,3,5,6,},
+  { _,_,_,0,2,3,5,6,},
+  { _,_,_,1,2,3,5,6,},
+  { _,_,0,1,2,3,5,6,},
+  { _,_,_,_,_,4,5,6,},
+  { _,_,_,_,0,4,5,6,},
+  { _,_,_,_,1,4,5,6,},
+  { _,_,_,0,1,4,5,6,},
+  { _,_,_,_,2,4,5,6,},
+  { _,_,_,0,2,4,5,6,},
+  { _,_,_,1,2,4,5,6,},
+  { _,_,0,1,2,4,5,6,},
+  { _,_,_,_,3,4,5,6,},
+  { _,_,_,0,3,4,5,6,},
+  { _,_,_,1,3,4,5,6,},
+  { _,_,0,1,3,4,5,6,},
+  { _,_,_,2,3,4,5,6,},
+  { _,_,0,2,3,4,5,6,},
+  { _,_,1,2,3,4,5,6,},
+  { _,0,1,2,3,4,5,6,},
+  { _,_,_,_,_,_,_,7,},
+  { _,_,_,_,_,_,0,7,},
+  { _,_,_,_,_,_,1,7,},
+  { _,_,_,_,_,0,1,7,},
+  { _,_,_,_,_,_,2,7,},
+  { _,_,_,_,_,0,2,7,},
+  { _,_,_,_,_,1,2,7,},
+  { _,_,_,_,0,1,2,7,},
+  { _,_,_,_,_,_,3,7,},
+  { _,_,_,_,_,0,3,7,},
+  { _,_,_,_,_,1,3,7,},
+  { _,_,_,_,0,1,3,7,},
+  { _,_,_,_,_,2,3,7,},
+  { _,_,_,_,0,2,3,7,},
+  { _,_,_,_,1,2,3,7,},
+  { _,_,_,0,1,2,3,7,},
+  { _,_,_,_,_,_,4,7,},
+  { _,_,_,_,_,0,4,7,},
+  { _,_,_,_,_,1,4,7,},
+  { _,_,_,_,0,1,4,7,},
+  { _,_,_,_,_,2,4,7,},
+  { _,_,_,_,0,2,4,7,},
+  { _,_,_,_,1,2,4,7,},
+  { _,_,_,0,1,2,4,7,},
+  { _,_,_,_,_,3,4,7,},
+  { _,_,_,_,0,3,4,7,},
+  { _,_,_,_,1,3,4,7,},
+  { _,_,_,0,1,3,4,7,},
+  { _,_,_,_,2,3,4,7,},
+  { _,_,_,0,2,3,4,7,},
+  { _,_,_,1,2,3,4,7,},
+  { _,_,0,1,2,3,4,7,},
+  { _,_,_,_,_,_,5,7,},
+  { _,_,_,_,_,0,5,7,},
+  { _,_,_,_,_,1,5,7,},
+  { _,_,_,_,0,1,5,7,},
+  { _,_,_,_,_,2,5,7,},
+  { _,_,_,_,0,2,5,7,},
+  { _,_,_,_,1,2,5,7,},
+  { _,_,_,0,1,2,5,7,},
+  { _,_,_,_,_,3,5,7,},
+  { _,_,_,_,0,3,5,7,},
+  { _,_,_,_,1,3,5,7,},
+  { _,_,_,0,1,3,5,7,},
+  { _,_,_,_,2,3,5,7,},
+  { _,_,_,0,2,3,5,7,},
+  { _,_,_,1,2,3,5,7,},
+  { _,_,0,1,2,3,5,7,},
+  { _,_,_,_,_,4,5,7,},
+  { _,_,_,_,0,4,5,7,},
+  { _,_,_,_,1,4,5,7,},
+  { _,_,_,0,1,4,5,7,},
+  { _,_,_,_,2,4,5,7,},
+  { _,_,_,0,2,4,5,7,},
+  { _,_,_,1,2,4,5,7,},
+  { _,_,0,1,2,4,5,7,},
+  { _,_,_,_,3,4,5,7,},
+  { _,_,_,0,3,4,5,7,},
+  { _,_,_,1,3,4,5,7,},
+  { _,_,0,1,3,4,5,7,},
+  { _,_,_,2,3,4,5,7,},
+  { _,_,0,2,3,4,5,7,},
+  { _,_,1,2,3,4,5,7,},
+  { _,0,1,2,3,4,5,7,},
+  { _,_,_,_,_,_,6,7,},
+  { _,_,_,_,_,0,6,7,},
+  { _,_,_,_,_,1,6,7,},
+  { _,_,_,_,0,1,6,7,},
+  { _,_,_,_,_,2,6,7,},
+  { _,_,_,_,0,2,6,7,},
+  { _,_,_,_,1,2,6,7,},
+  { _,_,_,0,1,2,6,7,},
+  { _,_,_,_,_,3,6,7,},
+  { _,_,_,_,0,3,6,7,},
+  { _,_,_,_,1,3,6,7,},
+  { _,_,_,0,1,3,6,7,},
+  { _,_,_,_,2,3,6,7,},
+  { _,_,_,0,2,3,6,7,},
+  { _,_,_,1,2,3,6,7,},
+  { _,_,0,1,2,3,6,7,},
+  { _,_,_,_,_,4,6,7,},
+  { _,_,_,_,0,4,6,7,},
+  { _,_,_,_,1,4,6,7,},
+  { _,_,_,0,1,4,6,7,},
+  { _,_,_,_,2,4,6,7,},
+  { _,_,_,0,2,4,6,7,},
+  { _,_,_,1,2,4,6,7,},
+  { _,_,0,1,2,4,6,7,},
+  { _,_,_,_,3,4,6,7,},
+  { _,_,_,0,3,4,6,7,},
+  { _,_,_,1,3,4,6,7,},
+  { _,_,0,1,3,4,6,7,},
+  { _,_,_,2,3,4,6,7,},
+  { _,_,0,2,3,4,6,7,},
+  { _,_,1,2,3,4,6,7,},
+  { _,0,1,2,3,4,6,7,},
+  { _,_,_,_,_,5,6,7,},
+  { _,_,_,_,0,5,6,7,},
+  { _,_,_,_,1,5,6,7,},
+  { _,_,_,0,1,5,6,7,},
+  { _,_,_,_,2,5,6,7,},
+  { _,_,_,0,2,5,6,7,},
+  { _,_,_,1,2,5,6,7,},
+  { _,_,0,1,2,5,6,7,},
+  { _,_,_,_,3,5,6,7,},
+  { _,_,_,0,3,5,6,7,},
+  { _,_,_,1,3,5,6,7,},
+  { _,_,0,1,3,5,6,7,},
+  { _,_,_,2,3,5,6,7,},
+  { _,_,0,2,3,5,6,7,},
+  { _,_,1,2,3,5,6,7,},
+  { _,0,1,2,3,5,6,7,},
+  { _,_,_,_,4,5,6,7,},
+  { _,_,_,0,4,5,6,7,},
+  { _,_,_,1,4,5,6,7,},
+  { _,_,0,1,4,5,6,7,},
+  { _,_,_,2,4,5,6,7,},
+  { _,_,0,2,4,5,6,7,},
+  { _,_,1,2,4,5,6,7,},
+  { _,0,1,2,4,5,6,7,},
+  { _,_,_,3,4,5,6,7,},
+  { _,_,0,3,4,5,6,7,},
+  { _,_,1,3,4,5,6,7,},
+  { _,0,1,3,4,5,6,7,},
+  { _,_,2,3,4,5,6,7,},
+  { _,0,2,3,4,5,6,7,},
+  { _,1,2,3,4,5,6,7,},
+  { 0,1,2,3,4,5,6,7,},
+};
--- a/ext/htslib/htscodecs/htscodecs/pooled_alloc.h
+++ b/ext/htslib/htscodecs/htscodecs/pooled_alloc.h
@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2009-2010, 2013 Genome Research Ltd.
+ * Author(s): James Bonfield
+ * 
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ * 
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *    Institute nor the names of its contributors may be used to endorse
+ *    or promote products derived from this software without specific
+ *    prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Defined static here as we only use in one file for now and don't
+// want to pollute the library name space (io_lib has the same named
+// functions).
+
+#ifndef _POOLED_ALLOC_H_
+#define _POOLED_ALLOC_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+/*
+ * Implements a pooled block allocator where all items are the same size,
+ * but we need many of them.
+ */
+typedef struct {
+    void   *pool;
+    size_t  used;
+} pool_t;
+
+typedef struct {
+    size_t dsize;
+    size_t npools;
+    pool_t *pools;
+    void *free;
+} pool_alloc_t;
+
+#define PSIZE 1024*1024
+
+static pool_alloc_t *pool_create(size_t dsize) {
+    pool_alloc_t *p;
+
+    if (NULL == (p = (pool_alloc_t *)malloc(sizeof(*p))))
+        return NULL;
+
+    /* Minimum size is a pointer, for free list */
+    dsize = (dsize + sizeof(void *) - 1) & ~(sizeof(void *)-1);
+    if (dsize < sizeof(void *))
+        dsize = sizeof(void *);
+    p->dsize = dsize;
+
+    p->npools = 0;
+    p->pools = NULL;
+    p->free  = NULL;
+
+    return p;
+}
+
+static pool_t *new_pool(pool_alloc_t *p) {
+    size_t n = PSIZE / p->dsize;
+    pool_t *pool;
+    
+    pool = realloc(p->pools, (p->npools + 1) * sizeof(*p->pools));
+    if (NULL == pool) return NULL;
+    p->pools = pool;
+    pool = &p->pools[p->npools];
+
+    pool->pool = malloc(n * p->dsize);
+    if (NULL == pool->pool) return NULL;
+
+    pool->used = 0;
+
+    p->npools++;
+
+    return pool;
+}
+
+static void pool_destroy(pool_alloc_t *p) {
+    size_t i;
+
+    for (i = 0; i < p->npools; i++) {
+        free(p->pools[i].pool);
+    }
+    free(p->pools);
+    free(p);
+}
+
+static void *pool_alloc(pool_alloc_t *p) {
+    pool_t *pool;
+    void *ret;
+
+    /* Look on free list */
+    if (NULL != p->free) {
+        ret = p->free;
+        p->free = *((void **)p->free);
+        return ret;
+    }
+
+    /* Look for space in the last pool */
+    if (p->npools) {
+        pool = &p->pools[p->npools - 1];
+        if (pool->used + p->dsize < PSIZE) {
+            ret = ((char *) pool->pool) + pool->used;
+            pool->used += p->dsize;
+            return ret;
+        }
+    }
+
+    /* Need a new pool */
+    pool = new_pool(p);
+    if (NULL == pool) return NULL;
+
+    pool->used = p->dsize;
+    return pool->pool;
+}
+
+// static void pool_free(pool_alloc_t *p, void *ptr) {
+//     *(void **)ptr = p->free;
+//     p->free = ptr;
+// }
+
+#endif /*_POOLED_ALLOC_H_*/
--- a/ext/htslib/htscodecs/htscodecs/rANS_byte.h
+++ b/ext/htslib/htscodecs/htscodecs/rANS_byte.h
@ -0,0 +1,569 @@
+/* rans_byte.h originally from https://github.com/rygorous/ryg_rans
+ *
+ * This is a public-domain implementation of several rANS variants. rANS is an
+ * entropy coder from the ANS family, as described in Jarek Duda's paper
+ * "Asymmetric numeral systems" (http://arxiv.org/abs/1311.2540).
+ */
+
+/*-------------------------------------------------------------------------- */
+/* rans_byte.h from https://github.com/rygorous/ryg_rans */
+
+// Simple byte-aligned rANS encoder/decoder - public domain - Fabian 'ryg' Giesen 2014
+//
+// Not intended to be "industrial strength"; just meant to illustrate the general
+// idea.
+
+#ifndef RANS_BYTE_HEADER
+#define RANS_BYTE_HEADER
+
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+
+#include "utils.h"
+
+#ifdef assert
+#define RansAssert assert
+#else
+#define RansAssert(x)
+#endif
+
+// READ ME FIRST:
+//
+// This is designed like a typical arithmetic coder API, but there's three
+// twists you absolutely should be aware of before you start hacking:
+//
+// 1. You need to encode data in *reverse* - last symbol first. rANS works
+//    like a stack: last in, first out.
+// 2. Likewise, the encoder outputs bytes *in reverse* - that is, you give
+//    it a pointer to the *end* of your buffer (exclusive), and it will
+//    slowly move towards the beginning as more bytes are emitted.
+// 3. Unlike basically any other entropy coder implementation you might
+//    have used, you can interleave data from multiple independent rANS
+//    encoders into the same bytestream without any extra signaling;
+//    you can also just write some bytes by yourself in the middle if
+//    you want to. This is in addition to the usual arithmetic encoder
+//    property of being able to switch models on the fly. Writing raw
+//    bytes can be useful when you have some data that you know is
+//    incompressible, and is cheaper than going through the rANS encode
+//    function. Using multiple rANS coders on the same byte stream wastes
+//    a few bytes compared to using just one, but execution of two
+//    independent encoders can happen in parallel on superscalar and
+//    Out-of-Order CPUs, so this can be *much* faster in tight decoding
+//    loops.
+//
+//    This is why all the rANS functions take the write pointer as an
+//    argument instead of just storing it in some context struct.
+
+// --------------------------------------------------------------------------
+
+// L ('l' in the paper) is the lower bound of our normalization interval.
+// Between this and our byte-aligned emission, we use 31 (not 32!) bits.
+// This is done intentionally because exact reciprocals for 31-bit uints
+// fit in 32-bit uints: this permits some optimizations during encoding.
+#define RANS_BYTE_L (1u << 23)  // lower bound of our normalization interval
+
+// State for a rANS encoder. Yep, that's all there is to it.
+typedef uint32_t RansState;
+
+// Initialize a rANS encoder.
+static inline void RansEncInit(RansState* r)
+{
+    *r = RANS_BYTE_L;
+}
+
+#if 0 /* Curently unused */
+// Renormalize the encoder. Internal function.
+static inline RansState RansEncRenorm(RansState x, uint8_t** pptr, uint32_t freq, uint32_t scale_bits)
+{
+    uint32_t x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq; // this turns into a shift.
+    if (x >= x_max) {
+        uint8_t* ptr = *pptr;
+        do {
+            *--ptr = (uint8_t) (x & 0xff);
+            x >>= 8;
+        } while (x >= x_max);
+        *pptr = ptr;
+    }
+    return x;
+}
+
+// Encodes a single symbol with range start "start" and frequency "freq".
+// All frequencies are assumed to sum to "1 << scale_bits", and the
+// resulting bytes get written to ptr (which is updated).
+//
+// NOTE: With rANS, you need to encode symbols in *reverse order*, i.e. from
+// beginning to end! Likewise, the output bytestream is written *backwards*:
+// ptr starts pointing at the end of the output buffer and keeps decrementing.
+static inline void RansEncPut(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)
+{
+    // renormalize
+    RansState x = RansEncRenorm(*r, pptr, freq, scale_bits);
+
+    // x = C(s,x)
+    *r = ((x / freq) << scale_bits) + (x % freq) + start;
+}
+#endif /* Curently unused */
+
+// Flushes the rANS encoder.
+static inline void RansEncFlush(RansState* r, uint8_t** pptr)
+{
+    uint32_t x = *r;
+    uint8_t* ptr = *pptr;
+
+    ptr -= 4;
+    ptr[0] = (uint8_t) (x >> 0);
+    ptr[1] = (uint8_t) (x >> 8);
+    ptr[2] = (uint8_t) (x >> 16);
+    ptr[3] = (uint8_t) (x >> 24);
+
+    *pptr = ptr;
+}
+
+// Initializes a rANS decoder.
+// Unlike the encoder, the decoder works forwards as you'd expect.
+static inline void RansDecInit(RansState* r, uint8_t** pptr)
+{
+    uint32_t x;
+    uint8_t* ptr = *pptr;
+
+    x  = ptr[0] << 0;
+    x |= ptr[1] << 8;
+    x |= ptr[2] << 16;
+    x |= ((uint32_t)ptr[3]) << 24;
+    ptr += 4;
+
+    *pptr = ptr;
+    *r = x;
+}
+
+// Returns the current cumulative frequency (map it to a symbol yourself!)
+static inline uint32_t RansDecGet(RansState* r, uint32_t scale_bits)
+{
+    return *r & ((1u << scale_bits) - 1);
+}
+
+// Advances in the bit stream by "popping" a single symbol with range start
+// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits",
+// and the resulting bytes get written to ptr (which is updated).
+static inline void RansDecAdvance(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)
+{
+    uint32_t mask = (1u << scale_bits) - 1;
+
+    // s, x = D(x)
+    uint32_t x = *r;
+    x = freq * (x >> scale_bits) + (x & mask) - start;
+
+    // renormalize
+    if (x < RANS_BYTE_L) {
+        uint8_t* ptr = *pptr;
+        do x = (x << 8) | *ptr++; while (x < RANS_BYTE_L);
+        *pptr = ptr;
+    }
+
+    *r = x;
+}
+
+// --------------------------------------------------------------------------
+
+// That's all you need for a full encoder; below here are some utility
+// functions with extra convenience or optimizations.
+
+// Encoder symbol description
+// This (admittedly odd) selection of parameters was chosen to make
+// RansEncPutSymbol as cheap as possible.
+typedef struct {
+    uint32_t x_max;     // (Exclusive) upper bound of pre-normalization interval
+    uint32_t rcp_freq;  // Fixed-point reciprocal frequency
+    uint32_t bias;      // Bias
+    uint16_t cmpl_freq; // Complement of frequency: (1 << scale_bits) - freq
+    uint16_t rcp_shift; // Reciprocal shift
+} RansEncSymbol;
+
+// Decoder symbols are straightforward.
+// 32-bit means more memory, but oddly faster on old gcc? Why?
+// 322MB/s vs 309MB/s for order-1.
+typedef struct {
+    uint16_t freq;      // Symbol frequency.
+    uint16_t start;     // Start of range.
+} RansDecSymbol;
+
+typedef struct {
+    uint32_t freq;      // Symbol frequency.
+    uint32_t start;     // Start of range.
+} RansDecSymbol32;
+
+// Initializes an encoder symbol to start "start" and frequency "freq"
+static inline void RansEncSymbolInit(RansEncSymbol* s, uint32_t start, uint32_t freq, uint32_t scale_bits)
+{
+    RansAssert(scale_bits <= 16);
+    RansAssert(start <= (1u << scale_bits));
+    RansAssert(freq <= (1u << scale_bits) - start);
+
+    // Say M := 1 << scale_bits.
+    //
+    // The original encoder does:
+    //   x_new = (x/freq)*M + start + (x%freq)
+    //
+    // The fast encoder does (schematically):
+    //   q     = mul_hi(x, rcp_freq) >> rcp_shift   (division)
+    //   r     = x - q*freq                         (remainder)
+    //   x_new = q*M + bias + r                     (new x)
+    // plugging in r into x_new yields:
+    //   x_new = bias + x + q*(M - freq)
+    //        =: bias + x + q*cmpl_freq             (*)
+    //
+    // and we can just precompute cmpl_freq. Now we just need to
+    // set up our parameters such that the original encoder and
+    // the fast encoder agree.
+
+    s->x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq;
+    s->cmpl_freq = (uint16_t) ((1 << scale_bits) - freq);
+    if (freq < 2) {
+        // freq=0 symbols are never valid to encode, so it doesn't matter what
+        // we set our values to.
+        //
+        // freq=1 is tricky, since the reciprocal of 1 is 1; unfortunately,
+        // our fixed-point reciprocal approximation can only multiply by values
+        // smaller than 1.
+        //
+        // So we use the "next best thing": rcp_freq=0xffffffff, rcp_shift=0.
+        // This gives:
+        //   q = mul_hi(x, rcp_freq) >> rcp_shift
+        //     = mul_hi(x, (1<<32) - 1)) >> 0
+        //     = floor(x - x/(2^32))
+        //     = x - 1 if 1 <= x < 2^32
+        // and we know that x>0 (x=0 is never in a valid normalization interval).
+        //
+        // So we now need to choose the other parameters such that
+        //   x_new = x*M + start
+        // plug it in:
+        //     x*M + start                   (desired result)
+        //   = bias + x + q*cmpl_freq        (*)
+        //   = bias + x + (x - 1)*(M - 1)    (plug in q=x-1, cmpl_freq)
+        //   = bias + 1 + (x - 1)*M
+        //   = x*M + (bias + 1 - M)
+        //
+        // so we have start = bias + 1 - M, or equivalently
+        //   bias = start + M - 1.
+        s->rcp_freq = ~0u;
+        s->rcp_shift = 0;
+        s->bias = start + (1 << scale_bits) - 1;
+    } else {
+        // Alverson, "Integer Division using reciprocals"
+        // shift=ceil(log2(freq))
+        uint32_t shift = 0;
+        while (freq > (1u << shift))
+            shift++;
+
+        s->rcp_freq = (uint32_t) (((1ull << (shift + 31)) + freq-1) / freq);
+        s->rcp_shift = shift - 1;
+
+        // With these values, 'q' is the correct quotient, so we
+        // have bias=start.
+        s->bias = start;
+    }
+
+    s->rcp_shift += 32; // Avoid the extra >>32 in RansEncPutSymbol
+}
+
+// Initialize a decoder symbol to start "start" and frequency "freq"
+static inline void RansDecSymbolInit(RansDecSymbol* s, uint32_t start, uint32_t freq)
+{
+    RansAssert(start <= (1 << 16));
+    RansAssert(freq <= (1 << 16) - start);
+    s->start = (uint16_t) start;
+    s->freq = (uint16_t) freq;
+}
+
+// Encodes a given symbol. This is faster than straight RansEnc since we can do
+// multiplications instead of a divide.
+//
+// See RansEncSymbolInit for a description of how this works.
+static inline void RansEncPutSymbol(RansState* r, uint8_t** pptr, RansEncSymbol const* sym)
+{
+    RansAssert(sym->x_max != 0); // can't encode symbol with freq=0
+
+    // renormalize
+    uint32_t x = *r;
+    uint32_t x_max = sym->x_max;
+
+    // This is better for 40-qual illumina (3.7% quicker overall CRAM).
+    // The old method was better for low complexity data such as NovaSeq
+    // quals (2.6% quicker overall CRAM).
+    int o = x >= x_max;
+    uint8_t* ptr = *pptr;
+    ptr[-1] = x & 0xff;
+    ptr -= o;
+    x >>= o*8;
+
+    if (unlikely(x >= x_max)) {
+        *--ptr = (uint8_t) (x & 0xff);
+        x >>= 8;
+    }
+    *pptr = ptr;
+
+    //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift);
+    //*r = q * sym->cmpl_freq + x + sym->bias;
+
+    // x = C(s,x)
+    // NOTE: written this way so we get a 32-bit "multiply high" when
+    // available. If you're on a 64-bit platform with cheap multiplies
+    // (e.g. x64), just bake the +32 into rcp_shift.
+    //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift;
+
+    // The extra >>32 has already been added to RansEncSymbolInit
+    uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift);
+    *r = q * sym->cmpl_freq + x + sym->bias;
+}
+
+// A 4-way version of RansEncPutSymbol, renormalising 4 states
+// simulatenously with their results written to the same ptr buffer.
+// (This is perhaps a failing as it makes optmisation tricky.)
+static inline void RansEncPutSymbol4(RansState *r0,
+                                     RansState *r1,
+                                     RansState *r2,
+                                     RansState *r3,
+                                     uint8_t** pptr,
+                                     RansEncSymbol const *sym0,
+                                     RansEncSymbol const *sym1,
+                                     RansEncSymbol const *sym2,
+                                     RansEncSymbol const *sym3)
+{
+    RansAssert(sym0->x_max != 0); // can't encode symbol with freq=0
+    RansAssert(sym1->x_max != 0); // can't encode symbol with freq=0
+    RansAssert(sym2->x_max != 0); // can't encode symbol with freq=0
+    RansAssert(sym3->x_max != 0); // can't encode symbol with freq=0
+
+    // renormalize
+    uint32_t x0, x1, x2, x3;
+    uint8_t* ptr = *pptr;
+
+    int o;
+    uint32_t m[4] = {
+        sym0->x_max,
+        sym1->x_max,
+        sym2->x_max,
+        sym3->x_max
+    };
+
+    x0 = *r0;
+    o = x0 >= m[0];
+    ptr[-1] = x0;
+    ptr -= o;
+    x0 >>= o*8;
+    if (x0 >= m[0]) {
+        *--ptr = x0;
+        x0 >>= 8;
+    }
+
+    x1 = *r1;
+    o = x1 >= m[1];
+    ptr[-1] = x1;
+    ptr -= o;
+    x1 >>= o*8;
+    if (x1 >= m[1]) {
+        *--ptr = x1;
+        x1 >>= 8;
+    }
+
+    x2 = *r2;
+    o = x2 >= m[2];
+    ptr[-1] = x2;
+    ptr -= o;
+    x2 >>= o*8;
+    if (x2 >= m[2]) {
+        *--ptr = x2;
+        x2 >>= 8;
+    }
+
+    x3 = *r3;
+    o = x3 >= m[3];
+    ptr[-1] = x3;
+    ptr -= o;
+    x3 >>= o*8;
+    if (x3 >= m[3]) {
+        *--ptr = x3;
+        x3 >>= 8;
+    }
+
+    *pptr = ptr;
+
+    // x = C(s,x)
+    uint32_t qa, qb;
+    qa = (uint32_t) (((uint64_t)x0 * sym0->rcp_freq) >> sym0->rcp_shift);
+    uint32_t X0 = qa * sym0->cmpl_freq;
+    qb = (uint32_t) (((uint64_t)x1 * sym1->rcp_freq) >> sym1->rcp_shift);
+    uint32_t X1 = qb * sym1->cmpl_freq;
+
+    *r0 = X0 + x0 + sym0->bias;
+    *r1 = X1 + x1 + sym1->bias;
+
+    qa = (uint32_t) (((uint64_t)x2 * sym2->rcp_freq) >> sym2->rcp_shift);
+    uint32_t X2 = qa * sym2->cmpl_freq;
+    qb = (uint32_t) (((uint64_t)x3 * sym3->rcp_freq) >> sym3->rcp_shift);
+    uint32_t X3 = qb * sym3->cmpl_freq;
+
+    *r2 = X2 + x2 + sym2->bias;
+    *r3 = X3 + x3 + sym3->bias;
+}
+
+// Equivalent to RansDecAdvance that takes a symbol.
+static inline void RansDecAdvanceSymbol(RansState* r, uint8_t** pptr, RansDecSymbol const* sym, uint32_t scale_bits)
+{
+    RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits);
+}
+
+// Advances in the bit stream by "popping" a single symbol with range start
+// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits".
+// No renormalization or output happens.
+static inline void RansDecAdvanceStep(RansState* r, uint32_t start, uint32_t freq, uint32_t scale_bits)
+{
+    uint32_t mask = (1u << scale_bits) - 1;
+
+    // s, x = D(x)
+    uint32_t x = *r;
+    *r = freq * (x >> scale_bits) + (x & mask) - start;
+}
+
+// Equivalent to RansDecAdvanceStep that takes a symbol.
+static inline void RansDecAdvanceSymbolStep(RansState* r, RansDecSymbol const* sym, uint32_t scale_bits)
+{
+    RansDecAdvanceStep(r, sym->start, sym->freq, scale_bits);
+}
+
+// Renormalize.
+#if defined(__x86_64) && !defined(__ILP32__)
+/*
+ * Assembly variants of the RansDecRenorm code.
+ * These are based on joint ideas from Rob Davies and from looking at
+ * the clang assembly output.
+ */
+static inline void RansDecRenorm(RansState* r, uint8_t** pptr) {
+    uint32_t  x   = *r;
+    uint8_t  *ptr = *pptr;
+
+    __asm__ ("movzbl (%0), %%eax\n\t"
+             "mov    %1, %%edx\n\t"
+             "shl    $0x8,%%edx\n\t"
+             "or     %%eax,%%edx\n\t"
+             "cmp    $0x800000,%1\n\t"
+             "cmovb  %%edx,%1\n\t"
+             "adc    $0x0,%0\n\t"
+             : "=r" (ptr), "=r" (x)
+             : "0" (ptr), "1" (x)
+             : "eax", "edx"
+             );
+    if (x < 0x800000) x = (x << 8) | *ptr++;
+    *pptr = ptr;
+    *r = x;
+}
+
+/*
+ * A variant that normalises two rans states.
+ * The only minor tweak here is to adjust the reorder a few opcodes
+ * to reduce dependency delays.
+ */
+static inline void RansDecRenorm2(RansState* r1, RansState* r2, uint8_t** pptr) {
+    uint32_t  x1   = *r1;
+    uint32_t  x2   = *r2;
+    uint8_t  *ptr = *pptr;
+
+    __asm__ ("movzbl (%0), %%eax\n\t"
+             "mov    %1, %%edx\n\t"
+             "shl    $0x8, %%edx\n\t"
+             "or     %%eax, %%edx\n\t"
+             "cmp    $0x800000, %1\n\t"
+             "cmovb  %%edx, %1\n\t"
+             "adc    $0x0, %0\n\t"
+             "mov    %2, %%edx\n\t"
+             "shl    $0x8, %%edx\n\t"
+             "cmp    $0x800000, %1\n\t"
+             "jae    1f\n\t"
+             "movzbl (%0), %%eax\n\t"
+             "shl    $0x8, %1\n\t"
+             "or     %%eax, %1\n\t"
+             "add    $0x1, %0\n\t"
+             "1:\n\t"
+             "movzbl (%0), %%eax\n\t"
+             "or     %%eax, %%edx\n\t"
+             "cmp    $0x800000, %2\n\t"
+             "cmovb  %%edx, %2\n\t"
+             "adc    $0x0, %0\n\t"
+             "cmp    $0x800000, %2\n\t"
+             "jae    2f\n\t"
+             "movzbl (%0), %%eax\n\t"
+             "shl    $0x8, %2\n\t"
+             "or     %%eax, %2\n\t"
+             "add    $0x1, %0\n\t"
+             "2:\n\t"
+             : "=r" (ptr), "=r" (x1), "=r" (x2)
+             : "0" (ptr), "1" (x1), "2" (x2)
+             : "eax", "edx"
+             );
+
+    *pptr = ptr;
+    *r1 = x1;
+    *r2 = x2;
+}
+
+#else /* __x86_64 */
+
+static inline void RansDecRenorm(RansState* r, uint8_t** pptr)
+{
+    // renormalize
+    uint32_t x = *r;
+
+#ifdef __clang__
+    // Generates cmov instructions on clang, but alas not gcc
+    uint8_t* ptr = *pptr;
+    uint32_t y = (x << 8) | *ptr;
+    uint32_t cond = x < RANS_BYTE_L;
+    x    = cond ? y : x;
+    ptr += cond ? 1 : 0;
+    if (x < RANS_BYTE_L) x = (x<<8) | *ptr++;
+    *pptr = ptr;
+#else
+    if (x >= RANS_BYTE_L) return;
+    uint8_t* ptr = *pptr;
+    x = (x << 8) | *ptr++;
+    if (x < RANS_BYTE_L) x = (x << 8) | *ptr++;
+    *pptr = ptr;
+#endif /* __clang__ */
+
+    *r = x;
+}
+
+static inline void RansDecRenorm2(RansState* r1, RansState* r2, uint8_t** pptr) {
+    RansDecRenorm(r1, pptr);
+    RansDecRenorm(r2, pptr);
+}
+
+#endif /* __x86_64 */
+
+static inline void RansDecRenormSafe(RansState* r, uint8_t** pptr, uint8_t *ptr_end)
+{
+    uint32_t x = *r;
+    uint8_t* ptr = *pptr;
+    if (x >= RANS_BYTE_L || ptr >= ptr_end) return;
+    x = (x << 8) | *ptr++;
+    if (x < RANS_BYTE_L && ptr < ptr_end)
+        x = (x << 8) | *ptr++;
+    *pptr = ptr;
+    *r = x;
+}
+
+static inline void RansDecSymbolInit32(RansDecSymbol32* s, uint32_t start, uint32_t freq)
+{
+    RansAssert(start <= (1 << 16));
+    RansAssert(freq <= (1 << 16) - start);
+    s->start = (uint16_t) start;
+    s->freq = (uint16_t) freq;
+}
+
+static inline void RansDecAdvanceSymbol32(RansState* r, uint8_t** pptr, RansDecSymbol32 const* sym, uint32_t scale_bits)
+{
+    RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits);
+}
+
+#endif // RANS_BYTE_HEADER
--- a/ext/htslib/htscodecs/htscodecs/rANS_static.c
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static.c
@ -0,0 +1,850 @@
+/*
+ * Copyright (c) 2014-2022 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+// Use 11 for order-1?
+#define TF_SHIFT 12
+#define TOTFREQ (1<<TF_SHIFT)
+
+#include "rANS_byte.h"
+#include "utils.h"
+
+/*-------------------------------------------------------------------------- */
+/*
+ * Example wrapper to use the rans_byte.h functions included above.
+ *
+ * This demonstrates how to use, and unroll, an order-0 and order-1 frequency
+ * model.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/time.h>
+#ifndef NO_THREADS
+#include <pthread.h>
+#endif
+
+#include "rANS_static.h"
+
+#define ABS(a) ((a)>0?(a):-(a))
+
+/*-----------------------------------------------------------------------------
+ * Memory to memory compression functions.
+ *
+ * These are original versions without any manual loop unrolling. They
+ * are easier to understand, but can be up to 2x slower.
+ */
+
+static
+unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size,
+                                unsigned int *out_size) {
+    unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9);
+    unsigned char *cp, *out_end;
+    RansEncSymbol syms[256];
+    RansState rans0;
+    RansState rans2;
+    RansState rans1;
+    RansState rans3;
+    uint8_t* ptr;
+    int F[256+MAGIC] = {0}, i, j, tab_size, rle, x, fsum = 0;
+    int m = 0, M = 0;
+    uint64_t tr;
+
+    if (!out_buf)
+        return NULL;
+
+    ptr = out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9;
+
+    // Compute statistics
+    if (hist8(in, in_size, (uint32_t *)F) < 0) {
+        free(out_buf);
+        return NULL;
+    }
+    tr = in_size ? ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size : 0;
+
+ normalise_harder:
+    // Normalise so T[i] == TOTFREQ
+    for (fsum = m = M = j = 0; j < 256; j++) {
+        if (!F[j])
+            continue;
+
+        if (m < F[j])
+            m = F[j], M = j;
+
+        if ((F[j] = (F[j]*tr)>>31) == 0)
+            F[j] = 1;
+        fsum += F[j];
+    }
+
+    fsum++;
+    if (fsum < TOTFREQ) {
+        F[M] += TOTFREQ-fsum;
+    } else if (fsum-TOTFREQ > F[M]/2) {
+        // Corner case to avoid excessive frequency reduction
+        tr = 2104533975; goto normalise_harder; // equiv to *0.98.
+    } else {
+        F[M] -= fsum-TOTFREQ;
+    }
+
+    //printf("F[%d]=%d\n", M, F[M]);
+    assert(F[M]>0);
+
+    // Encode statistics.
+    cp = out_buf+9;
+
+    for (x = rle = j = 0; j < 256; j++) {
+        if (F[j]) {
+            // j
+            if (rle) {
+                rle--;
+            } else {
+                *cp++ = j;
+                if (!rle && j && F[j-1])  {
+                    for(rle=j+1; rle<256 && F[rle]; rle++)
+                        ;
+                    rle -= j+1;
+                    *cp++ = rle;
+                }
+                //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]);
+            }
+            
+            // F[j]
+            if (F[j]<128) {
+                *cp++ = F[j];
+            } else {
+                *cp++ = 128 | (F[j]>>8);
+                *cp++ = F[j]&0xff;
+            }
+            RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT);
+            x += F[j];
+        }
+    }
+    *cp++ = 0;
+
+    //write(2, out_buf+4, cp-(out_buf+4));
+    tab_size = cp-out_buf;
+
+    RansEncInit(&rans0);
+    RansEncInit(&rans1);
+    RansEncInit(&rans2);
+    RansEncInit(&rans3);
+
+    switch (i=(in_size&3)) {
+    case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]);
+        // fall-through
+    case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]);
+        // fall-through
+    case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]);
+        // fall-through
+    case 0:
+        break;
+    }
+    for (i=(in_size &~3); likely(i>0); i-=4) {
+        RansEncSymbol *s3 = &syms[in[i-1]];
+        RansEncSymbol *s2 = &syms[in[i-2]];
+        RansEncSymbol *s1 = &syms[in[i-3]];
+        RansEncSymbol *s0 = &syms[in[i-4]];
+
+        RansEncPutSymbol(&rans3, &ptr, s3);
+        RansEncPutSymbol(&rans2, &ptr, s2);
+        RansEncPutSymbol(&rans1, &ptr, s1);
+        RansEncPutSymbol(&rans0, &ptr, s0);
+    }
+
+    RansEncFlush(&rans3, &ptr);
+    RansEncFlush(&rans2, &ptr);
+    RansEncFlush(&rans1, &ptr);
+    RansEncFlush(&rans0, &ptr);
+
+    // Finalise block size and return it
+    *out_size = (out_end - ptr) + tab_size;
+
+    cp = out_buf;
+
+    *cp++ = 0; // order
+    *cp++ = ((*out_size-9)>> 0) & 0xff;
+    *cp++ = ((*out_size-9)>> 8) & 0xff;
+    *cp++ = ((*out_size-9)>>16) & 0xff;
+    *cp++ = ((*out_size-9)>>24) & 0xff;
+
+    *cp++ = (in_size>> 0) & 0xff;
+    *cp++ = (in_size>> 8) & 0xff;
+    *cp++ = (in_size>>16) & 0xff;
+    *cp++ = (in_size>>24) & 0xff;
+
+    memmove(out_buf + tab_size, ptr, out_end-ptr);
+
+    return out_buf;
+}
+
+typedef struct {
+    unsigned char R[TOTFREQ];
+} ari_decoder;
+
+static
+unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size,
+                                  unsigned int *out_size) {
+    /* Load in the static tables */
+    unsigned char *cp = in + 9;
+    unsigned char *cp_end = in + in_size;
+    const uint32_t mask = (1u << TF_SHIFT)-1;
+    int i, j, rle;
+    unsigned int x, y;
+    unsigned int out_sz, in_sz;
+    char *out_buf;
+    RansState R[4];
+    RansState m[4];
+    uint16_t sfreq[TOTFREQ+32];
+    uint16_t ssym [TOTFREQ+32]; // faster, but only needs uint8_t
+    uint32_t sbase[TOTFREQ+16]; // faster, but only needs uint16_t
+
+    if (in_size < 26) // Need at least this many bytes just to start
+        return NULL;
+
+    if (*in++ != 0) // Order-0 check
+        return NULL;
+    
+    in_sz  = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24);
+    out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24);
+    if (in_sz != in_size-9)
+        return NULL;
+
+    if (out_sz >= INT_MAX)
+        return NULL; // protect against some overflow cases
+
+    // For speeding up the fuzzer only.
+    // Small input can lead to large uncompressed data.
+    // We reject this as it just slows things up instead of testing more code
+    // paths (once we've verified a few times for large data).
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (out_sz > 100000)
+        return NULL;
+#endif
+
+    out_buf = malloc(out_sz);
+    if (!out_buf)
+        return NULL;
+
+    //fprintf(stderr, "out_sz=%d\n", out_sz);
+
+    // Precompute reverse lookup of frequency.
+    rle = x = y = 0;
+    j = *cp++;
+    do {
+        int F, C;
+        if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left
+        if ((F = *cp++) >= 128) {
+            F &= ~128;
+            F = ((F & 127) << 8) | *cp++;
+        }
+        C = x;
+
+        if (x + F > TOTFREQ)
+            goto cleanup;
+
+        for (y = 0; y < F; y++) {
+            ssym [y + C] = j;
+            sfreq[y + C] = F;
+            sbase[y + C] = y;
+        }
+        x += F;
+
+        if (!rle && j+1 == *cp) {
+            j = *cp++;
+            rle = *cp++;
+        } else if (rle) {
+            rle--;
+            j++;
+            if (j > 255)
+                goto cleanup;
+        } else {
+            j = *cp++;
+        }
+    } while(j);
+
+    if (x < TOTFREQ-1 || x > TOTFREQ)
+        goto cleanup;
+    if (x != TOTFREQ) {
+        // Protection against accessing uninitialised memory in the case
+        // where SUM(freqs) == 4095 and not 4096.
+        ssym [x] = ssym [x-1];
+        sfreq[x] = sfreq[x-1];
+        sbase[x] = sbase[x-1]+1;
+    }
+
+    // 16 bytes of cp here. Also why cp - 16 in above loop.
+    if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left
+
+    RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto cleanup;
+    RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto cleanup;
+    RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto cleanup;
+    RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto cleanup;
+
+    int out_end = (out_sz&~3);
+    cp_end -= 8; // within 8 for simplicity of loop below
+    // 2 x likely() here harms gcc 7.5 by about 8% rate drop, but only in O2
+    for (i=0; likely(i < out_end); i+=4) {
+        //                              /curr code
+        // gcc7  O2 513/497   562/556++ 556/547 ok
+        // gcc7  O3 566/552   569/553   581/563+
+        // gcc10 O2 544/538   563/547   541/537-?
+        // gcc10 O3 531/519   546/530   575/546+
+        // gcc11 O2 512/490   588/540   540/535 mid
+        // gcc11 O3 482/471   553/541   549/535
+        // gcc12 O2 533/526   544/534   539/535
+        // gcc12 O3 548/533   502/497-- 553/527 ok
+        // clang10  555/542   564/549   560/541
+        // clang13  560/553   572/559   556/559
+        m[0] = R[0] & mask;
+        R[0] = sfreq[m[0]] * (R[0] >> TF_SHIFT) + sbase[m[0]];
+
+        m[1] = R[1] & mask;
+        R[1] = sfreq[m[1]] * (R[1] >> TF_SHIFT) + sbase[m[1]];
+
+        m[2] = R[2] & mask;
+        R[2] = sfreq[m[2]] * (R[2] >> TF_SHIFT) + sbase[m[2]];
+
+        m[3] = R[3] & mask;
+        R[3] = sfreq[m[3]] * (R[3] >> TF_SHIFT) + sbase[m[3]];
+
+        // likely() here harms gcc12 -O3
+        if (cp<cp_end) {
+            RansDecRenorm2(&R[0], &R[1], &cp);
+            RansDecRenorm2(&R[2], &R[3], &cp);
+        } else {
+            RansDecRenormSafe(&R[0], &cp, cp_end+8);
+            RansDecRenormSafe(&R[1], &cp, cp_end+8);
+            RansDecRenormSafe(&R[2], &cp, cp_end+8);
+            RansDecRenormSafe(&R[3], &cp, cp_end+8);
+        }
+
+        out_buf[i+0] = ssym[m[0]];
+        out_buf[i+1] = ssym[m[1]];
+        out_buf[i+2] = ssym[m[2]];
+        out_buf[i+3] = ssym[m[3]];
+    }
+
+
+    switch(out_sz&3) {
+    case 3:
+        out_buf[out_end + 2] = ssym[R[2] & mask];
+        // fall-through
+    case 2:
+        out_buf[out_end + 1] = ssym[R[1] & mask];
+        // fall-through
+    case 1:
+        out_buf[out_end] = ssym[R[0] & mask];
+        // fall-through
+    default:
+        break;
+    }
+    
+    *out_size = out_sz;
+    return (unsigned char *)out_buf;
+
+ cleanup:
+    free(out_buf);
+    return NULL;
+}
+
+static
+unsigned char *rans_compress_O1(unsigned char *in, unsigned int in_size,
+                                unsigned int *out_size) {
+    unsigned char *out_buf = NULL, *out_end, *cp;
+    unsigned int tab_size, rle_i, rle_j;
+
+
+    if (in_size < 4)
+        return rans_compress_O0(in, in_size, out_size);
+
+    int (*F)[256];
+    RansEncSymbol (*syms)[256];
+
+    uint8_t *mem = htscodecs_tls_alloc(256 * (sizeof(*syms) + sizeof(*F)));
+    if (!mem)
+        return NULL;
+    syms = (RansEncSymbol (*)[256])mem;
+    F = (int (*)[256])(mem + 256*sizeof(*syms));
+    memset(F, 0, 256*sizeof(*F));
+
+    if (!syms) goto cleanup;
+    int T[256+MAGIC] = {0};
+    int i, j;
+
+    out_buf = malloc(1.05*in_size + 257*257*3 + 9);
+    if (!out_buf) goto cleanup;
+
+    out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9;
+    cp = out_buf+9;
+
+    if (hist1_4(in, in_size, (uint32_t (*)[256])F, (uint32_t *)T) < 0) {
+        free(out_buf);
+        out_buf = NULL;
+        goto cleanup;
+    }
+
+    F[0][in[1*(in_size>>2)]]++;
+    F[0][in[2*(in_size>>2)]]++;
+    F[0][in[3*(in_size>>2)]]++;
+    T[0]+=3;
+
+    
+    // Normalise so T[i] == TOTFREQ
+    for (rle_i = i = 0; i < 256; i++) {
+        int t2, m, M;
+        unsigned int x;
+
+        if (T[i] == 0)
+            continue;
+
+        //uint64_t p = (TOTFREQ * TOTFREQ) / t;
+        double p = ((double)TOTFREQ)/T[i];
+    normalise_harder:
+        for (t2 = m = M = j = 0; j < 256; j++) {
+            if (!F[i][j])
+                continue;
+
+            if (m < F[i][j])
+                m = F[i][j], M = j;
+
+            //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0)
+            if ((F[i][j] *= p) == 0)
+                F[i][j] = 1;
+            t2 += F[i][j];
+        }
+
+        t2++;
+        if (t2 < TOTFREQ) {
+            F[i][M] += TOTFREQ-t2;
+        } else if (t2-TOTFREQ >= F[i][M]/2) {
+            // Corner case to avoid excessive frequency reduction
+            p = .98; goto normalise_harder;
+        } else {
+            F[i][M] -= t2-TOTFREQ;
+        }
+
+        // Store frequency table
+        // i
+        if (rle_i) {
+            rle_i--;
+        } else {
+            *cp++ = i;
+            // FIXME: could use order-0 statistics to observe which alphabet
+            // symbols are present and base RLE on that ordering instead.
+            if (i && T[i-1]) {
+                for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++)
+                    ;
+                rle_i -= i+1;
+                *cp++ = rle_i;
+            }
+        }
+
+        int *F_i_ = F[i];
+        x = 0;
+        rle_j = 0;
+        for (j = 0; j < 256; j++) {
+            if (F_i_[j]) {
+                //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x);
+
+                // j
+                if (rle_j) {
+                    rle_j--;
+                } else {
+                    *cp++ = j;
+                    if (!rle_j && j && F_i_[j-1]) {
+                        for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++)
+                            ;
+                        rle_j -= j+1;
+                        *cp++ = rle_j;
+                    }
+                }
+
+                // F_i_[j]
+                if (F_i_[j]<128) {
+                    *cp++ = F_i_[j];
+                } else {
+                    *cp++ = 128 | (F_i_[j]>>8);
+                    *cp++ = F_i_[j]&0xff;
+                }
+
+                RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT);
+                x += F_i_[j];
+            }
+        }
+        *cp++ = 0;
+    }
+    *cp++ = 0;
+
+    //write(2, out_buf+4, cp-(out_buf+4));
+    tab_size = cp - out_buf;
+    assert(tab_size < 257*257*3);
+    
+    RansState rans0, rans1, rans2, rans3;
+    RansEncInit(&rans0);
+    RansEncInit(&rans1);
+    RansEncInit(&rans2);
+    RansEncInit(&rans3);
+
+    uint8_t* ptr = out_end;
+
+    int isz4 = in_size>>2;
+    int i0 = 1*isz4-2;
+    int i1 = 2*isz4-2;
+    int i2 = 3*isz4-2;
+    int i3 = 4*isz4-2;
+
+    unsigned char l0 = in[i0+1];
+    unsigned char l1 = in[i1+1];
+    unsigned char l2 = in[i2+1];
+    unsigned char l3 = in[i3+1];
+
+    // Deal with the remainder
+    l3 = in[in_size-1];
+    for (i3 = in_size-2; i3 > 4*isz4-2; i3--) {
+        unsigned char c3 = in[i3];
+        RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]);
+        l3 = c3;
+    }
+
+    for (; likely(i0 >= 0); i0--, i1--, i2--, i3--) {
+        unsigned char c3 = in[i3];
+        unsigned char c2 = in[i2];
+        unsigned char c1 = in[i1];
+        unsigned char c0 = in[i0];
+
+        RansEncSymbol *s3 = &syms[c3][l3];
+        RansEncSymbol *s2 = &syms[c2][l2];
+        RansEncSymbol *s1 = &syms[c1][l1];
+        RansEncSymbol *s0 = &syms[c0][l0];
+
+        RansEncPutSymbol4(&rans3, &rans2, &rans1, &rans0, &ptr,
+                          s3, s2, s1, s0);
+
+        l3 = c3;
+        l2 = c2;
+        l1 = c1;
+        l0 = c0;
+    }
+
+    RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]);
+    RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]);
+    RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]);
+    RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]);
+
+    RansEncFlush(&rans3, &ptr);
+    RansEncFlush(&rans2, &ptr);
+    RansEncFlush(&rans1, &ptr);
+    RansEncFlush(&rans0, &ptr);
+
+    *out_size = (out_end - ptr) + tab_size;
+
+    cp = out_buf;
+    *cp++ = 1; // order
+
+    *cp++ = ((*out_size-9)>> 0) & 0xff;
+    *cp++ = ((*out_size-9)>> 8) & 0xff;
+    *cp++ = ((*out_size-9)>>16) & 0xff;
+    *cp++ = ((*out_size-9)>>24) & 0xff;
+
+    *cp++ = (in_size>> 0) & 0xff;
+    *cp++ = (in_size>> 8) & 0xff;
+    *cp++ = (in_size>>16) & 0xff;
+    *cp++ = (in_size>>24) & 0xff;
+
+    memmove(out_buf + tab_size, ptr, out_end-ptr);
+
+ cleanup:
+    htscodecs_tls_free(syms);
+
+    return out_buf;
+}
+
+static
+unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size,
+                                  unsigned int *out_size) {
+    /* Load in the static tables */
+    unsigned char *cp = in + 9;
+    unsigned char *ptr_end = in + in_size;
+    int i, j = -999, rle_i, rle_j;
+    unsigned int x;
+    unsigned int out_sz, in_sz;
+    char *out_buf = NULL;
+
+    // Sanity checking
+    if (in_size < 27) // Need at least this many bytes to start
+        return NULL;
+
+    if (*in++ != 1) // Order-1 check
+        return NULL;
+
+    in_sz  = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24);
+    out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24);
+    if (in_sz != in_size-9)
+        return NULL;
+
+    if (out_sz >= INT_MAX)
+        return NULL; // protect against some overflow cases
+
+    // For speeding up the fuzzer only.
+    // Small input can lead to large uncompressed data.
+    // We reject this as it just slows things up instead of testing more code
+    // paths (once we've verified a few times for large data).
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (out_sz > 100000)
+        return NULL;
+#endif
+
+    // Allocate decoding lookup tables
+    RansDecSymbol32 (*syms)[256];
+    uint8_t *mem = htscodecs_tls_calloc(256, sizeof(ari_decoder)
+                                        + sizeof(*syms));
+    if (!mem)
+        return NULL;
+    ari_decoder *const D = (ari_decoder *)mem;
+    syms = (RansDecSymbol32 (*)[256])(mem + 256*sizeof(ari_decoder));
+    int16_t map[256], map_i = 0;
+    
+    memset(map, -1, 256*sizeof(*map));
+
+    if (!D) goto cleanup;
+    /* These memsets prevent illegal memory access in syms due to
+       broken compressed data.  As D is calloc'd, all illegal transitions
+       will end up in either row or column 0 of syms. */
+    memset(&syms[0], 0, sizeof(syms[0]));
+    for (i = 0; i < 256; i++)
+        memset(&syms[i][0], 0, sizeof(syms[0][0]));
+
+    //fprintf(stderr, "out_sz=%d\n", out_sz);
+
+    //i = *cp++;
+    rle_i = 0;
+    i = *cp++;
+    do {
+        // Map arbitrary a,b,c to 0,1,2 to improve cache locality.
+        if (map[i] == -1)
+            map[i] = map_i++;
+        int m_i = map[i];
+
+        rle_j = x = 0;
+        j = *cp++;
+        do {
+            if (map[j] == -1)
+                map[j] = map_i++;
+
+            int F, C;
+            if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left
+            if ((F = *cp++) >= 128) {
+                F &= ~128;
+                F = ((F & 127) << 8) | *cp++;
+            }
+            C = x;
+
+            //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C);
+
+            if (unlikely(!F))
+                F = TOTFREQ;
+
+            RansDecSymbolInit32(&syms[m_i][j], C, F);
+
+            /* Build reverse lookup table */
+            //if (!D[i].R) D[i].R = (unsigned char *)malloc(TOTFREQ);
+            if (x + F > TOTFREQ)
+                goto cleanup;
+
+            memset(&D[m_i].R[x], j, F);
+            x += F;
+
+            if (!rle_j && j+1 == *cp) {
+                j = *cp++;
+                rle_j = *cp++;
+            } else if (rle_j) {
+                rle_j--;
+                j++;
+                if (j > 255)
+                    goto cleanup;
+            } else {
+                j = *cp++;
+            }
+        } while(j);
+
+        if (x < TOTFREQ-1 || x > TOTFREQ)
+            goto cleanup;
+        if (x < TOTFREQ) // historically we fill 4095, not 4096
+            D[i].R[x] = D[i].R[x-1];
+
+        if (!rle_i && i+1 == *cp) {
+            i = *cp++;
+            rle_i = *cp++;
+        } else if (rle_i) {
+            rle_i--;
+            i++;
+            if (i > 255)
+                goto cleanup;
+        } else {
+            i = *cp++;
+        }
+    } while (i);
+    for (i = 0; i < 256; i++)
+        if (map[i] == -1)
+            map[i] = 0;
+
+    RansState rans0, rans1, rans2, rans3;
+    uint8_t *ptr = cp;
+    if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left
+    RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup;
+    RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup;
+    RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup;
+    RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup;
+
+    RansState R[4];
+    R[0] = rans0;
+    R[1] = rans1;
+    R[2] = rans2;
+    R[3] = rans3;
+
+    unsigned int isz4 = out_sz>>2;
+    uint32_t l0 = 0;
+    uint32_t l1 = 0;
+    uint32_t l2 = 0;
+    uint32_t l3 = 0;
+    
+    unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4};
+
+    /* Allocate output buffer */
+    out_buf = malloc(out_sz);
+    if (!out_buf) goto cleanup;
+
+    uint8_t cc0 = D[map[l0]].R[R[0] & ((1u << TF_SHIFT)-1)];
+    uint8_t cc1 = D[map[l1]].R[R[1] & ((1u << TF_SHIFT)-1)];
+    uint8_t cc2 = D[map[l2]].R[R[2] & ((1u << TF_SHIFT)-1)];
+    uint8_t cc3 = D[map[l3]].R[R[3] & ((1u << TF_SHIFT)-1)];
+
+    ptr_end -= 8;
+    for (; likely(i4[0] < isz4); i4[0]++, i4[1]++, i4[2]++, i4[3]++) {
+        // seq4-head2: file q40b
+        //          O3      O2
+        // gcc7     296/291 290/260
+        // gcc10    292/292 290/261
+        // gcc11    293/293 290/265
+        // gcc12    293/290 291/266
+        // clang10  293/290 296/272
+        // clang13  300/290 290/266
+        out_buf[i4[0]] = cc0;
+        out_buf[i4[1]] = cc1;
+        out_buf[i4[2]] = cc2;
+        out_buf[i4[3]] = cc3;
+
+        RansDecSymbol32 s[4] = {
+            syms[l0][cc0],
+            syms[l1][cc1],
+            syms[l2][cc2],
+            syms[l3][cc3],
+        };
+        RansDecAdvanceStep(&R[0], s[0].start, s[0].freq, TF_SHIFT);
+        RansDecAdvanceStep(&R[1], s[1].start, s[1].freq, TF_SHIFT);
+        RansDecAdvanceStep(&R[2], s[2].start, s[2].freq, TF_SHIFT);
+        RansDecAdvanceStep(&R[3], s[3].start, s[3].freq, TF_SHIFT);
+
+        // Likely here helps speed of high-entropy data by 10-11%,
+        // but harms low entropy-data speed by 3-4%.
+        if ((ptr < ptr_end)) {
+            RansDecRenorm2(&R[0], &R[1], &ptr);
+            RansDecRenorm2(&R[2], &R[3], &ptr);
+        } else {
+            RansDecRenormSafe(&R[0], &ptr, ptr_end+8);
+            RansDecRenormSafe(&R[1], &ptr, ptr_end+8);
+            RansDecRenormSafe(&R[2], &ptr, ptr_end+8);
+            RansDecRenormSafe(&R[3], &ptr, ptr_end+8);
+        }
+
+        l0 = map[cc0];
+        l1 = map[cc1];
+        l2 = map[cc2];
+        l3 = map[cc3];
+
+        cc0 = D[l0].R[R[0] & ((1u << TF_SHIFT)-1)];
+        cc1 = D[l1].R[R[1] & ((1u << TF_SHIFT)-1)];
+        cc2 = D[l2].R[R[2] & ((1u << TF_SHIFT)-1)];
+        cc3 = D[l3].R[R[3] & ((1u << TF_SHIFT)-1)];
+    }
+
+    // Remainder
+    for (; i4[3] < out_sz; i4[3]++) {
+        unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)];
+        out_buf[i4[3]] = c3;
+
+        uint32_t m = R[3] & ((1u << TF_SHIFT)-1);
+        R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start;
+        RansDecRenormSafe(&R[3], &ptr, ptr_end+8);
+        l3 = map[c3];
+    }
+    
+    *out_size = out_sz;
+
+ cleanup:
+    htscodecs_tls_free(D);
+
+    return (unsigned char *)out_buf;
+}
+
+/*-----------------------------------------------------------------------------
+ * Simple interface to the order-0 vs order-1 encoders and decoders.
+ */
+unsigned char *rans_compress(unsigned char *in, unsigned int in_size,
+                             unsigned int *out_size, int order) {
+    if (in_size > INT_MAX) {
+        *out_size = 0;
+        return NULL;
+    }
+
+    return order
+        ? rans_compress_O1(in, in_size, out_size)
+        : rans_compress_O0(in, in_size, out_size);
+}
+
+unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size,
+                               unsigned int *out_size) {
+    /* Both rans_uncompress functions need to be able to read at least 9
+       bytes. */
+    if (in_size < 9)
+        return NULL;
+    return in[0]
+        ? rans_uncompress_O1(in, in_size, out_size)
+        : rans_uncompress_O0(in, in_size, out_size);
+}
--- a/ext/htslib/htscodecs/htscodecs/rANS_static.h
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static.h
@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2014-2019 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RANS_STATIC_H
+#define RANS_STATIC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned char *rans_compress(unsigned char *in, unsigned int in_size,
+                             unsigned int *out_size, int order);
+unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size,
+                               unsigned int *out_size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RANS_STATIC_H */
--- a/ext/htslib/htscodecs/htscodecs/rANS_static16_int.h
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static16_int.h
@ -0,0 +1,636 @@
+#ifndef RANS_INTERNAL_H
+#define RANS_INTERNAL_H
+
+#include "config.h"
+#include "varint.h"
+#include "utils.h"
+
+/*
+ * Copyright (c) 2017-2022 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Internal: common parts to all the rANSNx16pr implementations.
+
+// As per standard rANS_static but using optional RLE or bit-packing
+// techniques prior to entropy encoding.  This is a significant
+// reduction in some data sets.
+
+// top bits in order byte
+#define X_PACK   0x80    // Pack 2,4,8 or infinite symbols into a byte.
+#define X_RLE    0x40    // Run length encoding with runs & lits encoded separately
+#define X_CAT    0x20    // Nop; for tiny segments where rANS overhead is too big
+#define X_NOSZ   0x10    // Don't store the original size; used by STRIPE mode
+#define X_STRIPE 0x08    // For N-byte integer data; rotate & encode N streams.
+#define X_32     0x04    // 32-way unrolling instead of 4-way
+
+// Not part of the file format, but used to direct the encoder
+#define X_SIMD_AUTO 0x100 // automatically enable X_32 if we deem it worthy
+#define X_SW32_ENC  0x200 // forcibly use the software version of X_32
+#define X_SW32_DEC  0x400 // forcibly use the software version of X_32
+#define X_NO_AVX512 0x800 // turn off avx512, but permits AVX2
+
+#define TF_SHIFT 12
+#define TOTFREQ (1<<TF_SHIFT)
+
+
+// 9-11 is considerably faster in the O1 variant due to reduced table size.
+// We auto-tune between 10 and 12 though.  Anywhere from 9 to 14 are viable.
+#ifndef TF_SHIFT_O1
+#define TF_SHIFT_O1 12
+#endif
+#ifndef TF_SHIFT_O1_FAST
+#define TF_SHIFT_O1_FAST 10
+#endif
+#define TOTFREQ_O1 (1<<TF_SHIFT_O1)
+#define TOTFREQ_O1_FAST (1<<TF_SHIFT_O1_FAST)
+
+unsigned char *rans_compress_O0_4x16(unsigned char *in, unsigned int in_size,
+                                     unsigned char *out, unsigned int *out_size);
+unsigned char *rans_uncompress_O0_4x16(unsigned char *in, unsigned int in_size,
+                                       unsigned char *out, unsigned int out_sz);
+
+int rans_compute_shift(uint32_t *F0, uint32_t (*F)[256], uint32_t *T,
+                       uint32_t *S);
+
+// Rounds to next power of 2.
+// credit to http://graphics.stanford.edu/~seander/bithacks.html
+static inline uint32_t round2(uint32_t v) {
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+    return v;
+}
+
+static inline int normalise_freq(uint32_t *F, int size, uint32_t tot) {
+    int m, M, j, loop = 0;
+    uint64_t tr;
+    if (!size)
+        return 0;
+
+ again:
+    tr = ((uint64_t)tot<<31)/size + (1<<30)/size;
+
+    for (size = m = M = j = 0; j < 256; j++) {
+        if (!F[j])
+            continue;
+
+        if (m < F[j])
+            m = F[j], M = j;
+
+        if ((F[j] = (F[j]*tr)>>31) == 0)
+            F[j] = 1;
+        size += F[j];
+//      if (F[j] == tot)
+//          F[j]--;
+    }
+
+    int adjust = tot - size;
+    if (adjust > 0) {
+        F[M] += adjust;
+    } else if (adjust < 0) {
+        if (F[M] > -adjust && (loop == 1 || F[M]/2 >= -adjust)) {
+            F[M] += adjust;
+        } else {
+            if (loop < 1) {
+                loop++;
+                goto again;
+            }
+            adjust += F[M]-1;
+            F[M] = 1;
+            for (j = 0; adjust && j < 256; j++) {
+                if (F[j] < 2) continue;
+
+                int d = F[j] > -adjust;
+                int m = d ? adjust : 1-F[j];
+                F[j]   += m;
+                adjust -= m;
+            }
+        }
+    }
+
+    //printf("F[%d]=%d\n", M, F[M]);
+    return F[M]>0 ? 0 : -1;
+}
+
+// A specialised version of normalise_freq_shift where the input size
+// is already normalised to a power of 2, meaning we can just perform
+// shifts instead of hard to define multiplications and adjustments.
+static inline void normalise_freq_shift(uint32_t *F, uint32_t size,
+                                        uint32_t max_tot) {
+    if (size == 0 || size == max_tot)
+        return;
+
+    int shift = 0, i;
+    while (size < max_tot)
+        size*=2, shift++;
+
+    for (i = 0; i < 256; i++)
+        F[i] <<= shift;
+}
+
+// symbols only
+static inline int encode_alphabet(uint8_t *cp, uint32_t *F) {
+    uint8_t *op = cp;
+    int rle, j;
+
+    for (rle = j = 0; j < 256; j++) {
+        if (F[j]) {
+            // j
+            if (rle) {
+                rle--;
+            } else {
+                *cp++ = j;
+                if (!rle && j && F[j-1])  {
+                    for(rle=j+1; rle<256 && F[rle]; rle++)
+                        ;
+                    rle -= j+1;
+                    *cp++ = rle;
+                }
+                //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]);
+            }
+        }
+    }
+    *cp++ = 0;
+    
+    return cp - op;
+}
+
+static inline int decode_alphabet(uint8_t *cp, uint8_t *cp_end, uint32_t *F) {
+    if (cp == cp_end)
+        return 0;
+
+    uint8_t *op = cp;
+    int rle = 0;
+    int j = *cp++;
+    if (cp+2 >= cp_end)
+        goto carefully;
+
+    do {
+        F[j] = 1;
+        if (!rle && j+1 == *cp) {
+            j = *cp++;
+            rle = *cp++;
+        } else if (rle) {
+            rle--;
+            j++;
+            if (j > 255)
+                return 0;
+        } else {
+            j = *cp++;
+        }
+    } while(j && cp+2 < cp_end);
+
+ carefully:
+    if (j) {
+        do {
+            F[j] = 1;
+            if(cp >= cp_end) return 0;
+            if (!rle && j+1 == *cp) {
+                if (cp+1 >= cp_end) return 0;
+                j = *cp++;
+                rle = *cp++;
+            } else if (rle) {
+                rle--;
+                j++;
+                if (j > 255)
+                    return 0;
+            } else {
+                if (cp >= cp_end) return 0;
+                j = *cp++;
+            }
+        } while(j && cp < cp_end);
+    }
+
+    return cp - op;
+}
+
+static inline int encode_freq(uint8_t *cp, uint32_t *F) {
+    uint8_t *op = cp;
+    int j;
+
+    cp += encode_alphabet(cp, F);
+
+    for (j = 0; j < 256; j++) {
+        if (F[j])
+            cp += var_put_u32(cp, NULL, F[j]);
+    }
+
+    return cp - op;
+}
+
+static inline int decode_freq(uint8_t *cp, uint8_t *cp_end, uint32_t *F,
+                              uint32_t *fsum) {
+    if (cp == cp_end)
+        return 0;
+
+    uint8_t *op = cp;
+    cp += decode_alphabet(cp, cp_end, F);
+
+    int j, tot = 0;
+    for (j = 0; j < 256; j++) {
+        if (F[j]) {
+            cp += var_get_u32(cp, cp_end, (unsigned int *)&F[j]);
+            tot += F[j];
+        }
+    }
+
+    *fsum = tot;
+    return cp - op;
+}
+
+
+// Use the order-0 freqs in F0 to encode the order-1 stats in F.
+// All symbols present in F are present in F0, but some in F0 will
+// be empty in F.  Thus we run-length encode the 0 frequencies.
+static inline int encode_freq_d(uint8_t *cp, uint32_t *F0, uint32_t *F) {
+    uint8_t *op = cp;
+    int j, dz;
+
+    for (dz = j = 0; j < 256; j++) {
+        if (F0[j]) {
+            if (F[j] != 0) {
+                if (dz) {
+                    // Replace dz zeros with zero + dz-1 run length
+                    cp -= dz-1;
+                    *cp++ = dz-1;
+                }
+                dz = 0;
+                cp += var_put_u32(cp, NULL, F[j]);
+            } else {
+                //fprintf(stderr, "2: j=%d F0[j]=%d, F[j]=%d, dz=%d\n", j, F0[j], F[j], dz);
+                dz++;
+                *cp++ = 0;
+            }
+        }
+    }
+    
+    if (dz) {
+        cp -= dz-1;
+        *cp++ = dz-1;
+    }
+
+    return cp - op;
+}
+
+// Normalise frequency total T[i] to match TOTFREQ_O1 and encode.
+// Also initialises the RansEncSymbol structs.
+//
+// Returns the desired TF_SHIFT; 10 or 12 bit, or -1 on error.
+static inline int encode_freq1(uint8_t *in, uint32_t in_size, int Nway,
+                               RansEncSymbol syms[256][256], uint8_t **cp_p) {
+    int i, j, z;
+    uint8_t *out = *cp_p, *cp = out;
+
+    // Compute O1 frequency statistics
+    uint32_t (*F)[256] = htscodecs_tls_calloc(256, (sizeof(*F)));
+    if (!F)
+        return -1;
+    uint32_t T[256+MAGIC] = {0};
+    int isz4 = in_size/Nway;
+    if (hist1_4(in, in_size, F, T) < 0)
+        goto err;
+    for (z = 1; z < Nway; z++)
+        F[0][in[z*isz4]]++;
+    T[0]+=Nway-1;
+
+    // Potential fix for the wrap-around bug in AVX2 O1 encoder with shift=12.
+    // This occurs when we have one single symbol, giving freq=4096.
+    // We fix it elsewhere for now by looking for the wrap-around.
+    // See "if (1)" statements in the AVX2 code, which is an alternative
+    // to the "if (0)" here.
+//    if (0) {
+//      int x = -1, y = -1;
+//      int n1, n2;
+//      for (x = 0; x < 256; x++) {
+//          n1 = n2 = -1;
+//          for (y = 0; y < 256; y++) {
+//              if (F[x][y])
+//                  n2 = n1, n1 = y;
+//          }
+//          if (n2!=-1 || n1 == -1)
+//              continue;
+//
+//          for (y = 0; y < 256; y++)
+//              if (!F[x][y])
+//                  break;
+//          assert(y<256);
+//          F[x][y]++;
+//          F[0][y]++; T[y]++; F0[y]=1;
+//          F[0][x]++; T[x]++; F0[x]=1;
+//      }
+//    }
+
+    // Encode the order-0 stats
+    int tmp_T0 = T[0];
+    T[0] = 1;
+    *cp++ = 0; // marker for uncompressed (may change)
+    cp += encode_alphabet(cp, T);
+    T[0] = tmp_T0;
+
+    // Decide between 10-bit and 12-bit freqs.
+    // Fills out S[] to hold the new scaled maximum value.
+    uint32_t S[256] = {0};
+    int shift = rans_compute_shift(T, F, T, S);
+
+    // Normalise so T[i] == TOTFREQ_O1
+    for (i = 0; i < 256; i++) {
+        unsigned int x;
+
+        if (T[i] == 0)
+            continue;
+
+        uint32_t max_val = S[i];
+        if (shift == TF_SHIFT_O1_FAST && max_val > TOTFREQ_O1_FAST)
+            max_val = TOTFREQ_O1_FAST;
+
+        if (normalise_freq(F[i], T[i], max_val) < 0)
+            goto err;
+        T[i]=max_val;
+
+        // Encode our frequency array
+        cp += encode_freq_d(cp, T, F[i]);
+
+        normalise_freq_shift(F[i], T[i], 1<<shift); T[i]=1<<shift;
+
+        // Initialise Rans Symbol struct too.
+        uint32_t *F_i_ = F[i];
+        for (x = j = 0; j < 256; j++) {
+            RansEncSymbolInit(&syms[i][j], x, F_i_[j], shift);
+            x += F_i_[j];
+        }
+    }
+
+    *out = shift<<4;
+    if (cp - out > 1000) {
+        uint8_t *op = out;
+        // try rans0 compression of header
+        unsigned int u_freq_sz = cp-(op+1);
+        unsigned int c_freq_sz;
+        unsigned char *c_freq = rans_compress_O0_4x16(op+1, u_freq_sz, NULL,
+                                                      &c_freq_sz);
+        if (c_freq && c_freq_sz + 6 < cp-op) {
+            *op++ |= 1; // compressed
+            op += var_put_u32(op, NULL, u_freq_sz);
+            op += var_put_u32(op, NULL, c_freq_sz);
+            memcpy(op, c_freq, c_freq_sz);
+            cp = op+c_freq_sz;
+        }
+        free(c_freq);
+    }
+
+    *cp_p = cp;
+    htscodecs_tls_free(F);
+    return shift;
+
+ err:
+    htscodecs_tls_free(F);
+    return -1;
+}
+
+// Part of decode_freq1 below.  This decodes an order-1 frequency table
+// using an order-0 table to determine which stats may be stored.
+static inline int decode_freq_d(uint8_t *cp, uint8_t *cp_end, uint32_t *F0,
+                                uint32_t *F, uint32_t *total) {
+    if (cp == cp_end)
+        return 0;
+
+    uint8_t *op = cp;
+    int j, dz, T = 0;
+
+    for (j = dz = 0; j < 256 && cp < cp_end; j++) {
+        //if (F0[j]) fprintf(stderr, "F0[%d]=%d\n", j, F0[j]);
+        if (!F0[j])
+            continue;
+
+        uint32_t f;
+        if (dz) {
+            f = 0;
+            dz--;
+        } else {
+            if (cp >= cp_end) return 0;
+            cp += var_get_u32(cp, cp_end, &f);
+            if (f == 0) {
+                if (cp >= cp_end) return 0;
+                dz = *cp++;
+            }
+        }
+        F[j] = f;
+        T += f;
+    }
+
+    if (total) *total = T;
+    return cp - op;
+}
+
+typedef struct {
+    uint16_t f;
+    uint16_t b;
+} fb_t;
+
+// Decode order-1 frequency table, filling out various lookup tables
+// in the process. (Which will depend on shift and which values have
+// been passed in.)
+//
+// Returns the number of bytes decoded.
+static inline int decode_freq1(uint8_t *cp, uint8_t *cp_end, int shift,
+                               uint32_t s3 [256][TOTFREQ_O1],
+                               uint32_t s3F[256][TOTFREQ_O1_FAST],
+                               uint8_t *sfb[256], fb_t fb[256][256]) {
+    uint8_t *cp_start = cp;
+    int i, j, x;
+    uint32_t F0[256] = {0};
+    int fsz = decode_alphabet(cp, cp_end, F0);
+    if (!fsz)
+        goto err;
+    cp += fsz;
+
+    if (cp >= cp_end)
+        goto err;
+
+    // silence false gcc warnings
+    if (fb) {fb [0][0].b= 0;}
+    if (s3) {s3 [0][0]  = 0;}
+    if (s3F){s3F[0][0]  = 0;}
+
+    for (i = 0; i < 256; i++) {
+        if (F0[i] == 0)
+            continue;
+
+        uint32_t F[256] = {0}, T = 0;
+        fsz = decode_freq_d(cp, cp_end, F0, F, &T);
+        if (!fsz)
+            goto err;
+        cp += fsz;
+
+        if (!T) {
+            //fprintf(stderr, "No freq for F_%d\n", i);
+            continue;
+        }
+
+        normalise_freq_shift(F, T, 1<<shift);
+
+        // Build symbols; fixme, do as part of decode, see the _d variant
+        for (j = x = 0; j < 256; j++) {
+            if (F[j]) {
+                if (F[j] > (1<<shift) - x)
+                    goto err;
+
+                if (sfb && shift == TF_SHIFT_O1) {
+                    memset(&sfb[i][x], j, F[j]);
+                    fb[i][j].f = F[j];
+                    fb[i][j].b = x;
+                } else if (s3 && shift == TF_SHIFT_O1) {
+                    int y;
+                    for (y = 0; y < F[j]; y++)
+                        s3[i][y+x] = (((uint32_t)F[j])<<(shift+8)) |(y<<8) |j;
+                } else if (s3F && shift == TF_SHIFT_O1_FAST) {
+                    int y;
+                    for (y = 0; y < F[j]; y++)
+                        s3F[i][y+x] = (((uint32_t)F[j])<<(shift+8)) |(y<<8) |j;
+                }
+
+                x += F[j];
+            }
+        }
+        if (x != (1<<shift))
+            goto err;
+    }
+
+    return cp - cp_start;
+
+ err:
+    return 0;
+}
+
+// Build s3 symbol lookup table.
+// This is 12 bit freq, 12 bit bias and 8 bit symbol.
+static inline int rans_F_to_s3(const uint32_t *F, int shift, uint32_t *s3) {
+    int j, x;
+    for (j = x = 0; j < 256; j++) {
+        if (F[j] && F[j] <= (1<<shift) - x) {
+            uint32_t base = (((uint32_t)F[j])<<(shift+8))|j, y;
+            for (y = 0; y < F[j]; y++, x++)
+                s3[x] = base + (y<<8);
+        }
+    }
+
+    return x == (1<<shift) ? 0 : 1;
+}
+
+#ifdef ROT32_SIMD
+#include <x86intrin.h>
+
+// Our own implementation of _mm256_set_m128i as it's not there on older
+// gcc implementations.  This is basically the same thing.
+static inline __m256i _mm256_set_m128ix(__m128i H, __m128i L) {
+    return _mm256_insertf128_si256(_mm256_castsi128_si256(L), H, 1);
+}
+
+static inline void rot32_simd(uint8_t t[32][32], uint8_t *out, int iN[32]) {
+    int z;
+
+    __m256i lh8[32];
+    for (z = 0; z < 32/2; z+=2) {
+        __m256i a, b, c, d;
+        a = _mm256_loadu_si256((__m256i *)&t[z*2+0]);
+        b = _mm256_loadu_si256((__m256i *)&t[z*2+1]);
+        c = _mm256_loadu_si256((__m256i *)&t[z*2+2]);
+        d = _mm256_loadu_si256((__m256i *)&t[z*2+3]);
+
+        lh8[z+0]  = _mm256_unpacklo_epi8(a, b);
+        lh8[z+16] = _mm256_unpackhi_epi8(a, b);
+        lh8[z+1]  = _mm256_unpacklo_epi8(c, d);
+        lh8[z+17] = _mm256_unpackhi_epi8(c, d);
+    }
+
+    __m256i lh32[32];
+    for (z = 0; z < 32/4; z+=2) {
+        __m256i a, b, c, d;
+        a = _mm256_unpacklo_epi16(lh8[z*4+0], lh8[z*4+1]);
+        b = _mm256_unpacklo_epi16(lh8[z*4+2], lh8[z*4+3]);
+        c = _mm256_unpackhi_epi16(lh8[z*4+0], lh8[z*4+1]);
+        d = _mm256_unpackhi_epi16(lh8[z*4+2], lh8[z*4+3]);
+
+        __m256i e, f, g, h;
+        e = _mm256_unpacklo_epi16(lh8[(z+1)*4+0], lh8[(z+1)*4+1]);
+        f = _mm256_unpacklo_epi16(lh8[(z+1)*4+2], lh8[(z+1)*4+3]);
+        g = _mm256_unpackhi_epi16(lh8[(z+1)*4+0], lh8[(z+1)*4+1]);
+        h = _mm256_unpackhi_epi16(lh8[(z+1)*4+2], lh8[(z+1)*4+3]);
+
+        lh32[z+0]  = _mm256_unpacklo_epi32(a,b);
+        lh32[z+8]  = _mm256_unpacklo_epi32(c,d);
+        lh32[z+16] = _mm256_unpackhi_epi32(a,b);
+        lh32[z+24] = _mm256_unpackhi_epi32(c,d);
+
+        lh32[z+1+0]  = _mm256_unpacklo_epi32(e,f);
+        lh32[z+1+8]  = _mm256_unpacklo_epi32(g,h);
+        lh32[z+1+16] = _mm256_unpackhi_epi32(e,f);
+        lh32[z+1+24] = _mm256_unpackhi_epi32(g,h);
+    }
+
+    // Final unpack 64 and store
+    int idx[] = {0, 8, 4, 12, 2, 10, 6, 14};
+    for (z = 0; z < 8; z++) {
+        int i = idx[z];
+
+        // Putting this here doesn't soeed things up
+        __m256i a = _mm256_unpacklo_epi64(lh32[i*2+0], lh32[i*2+1]);
+        __m256i b = _mm256_unpacklo_epi64(lh32[i*2+2], lh32[i*2+3]);
+        __m256i c = _mm256_unpackhi_epi64(lh32[i*2+0], lh32[i*2+1]);
+        __m256i d = _mm256_unpackhi_epi64(lh32[i*2+2], lh32[i*2+3]);
+
+        __m256i p = _mm256_set_m128ix(_mm256_extracti128_si256(b,0),
+                                      _mm256_extracti128_si256(a,0));
+        __m256i q = _mm256_set_m128ix(_mm256_extracti128_si256(d,0),
+                                      _mm256_extracti128_si256(c,0));
+        __m256i r = _mm256_set_m128ix(_mm256_extracti128_si256(b,1),
+                                      _mm256_extracti128_si256(a,1));
+        __m256i s = _mm256_set_m128ix(_mm256_extracti128_si256(d,1),
+                                      _mm256_extracti128_si256(c,1));
+
+        _mm256_storeu_si256((__m256i *)(&out[iN[z*2+0]]),  p);
+        _mm256_storeu_si256((__m256i *)(&out[iN[z*2+1]]),  q);
+        _mm256_storeu_si256((__m256i *)(&out[iN[z*2+16]]), r);
+        _mm256_storeu_si256((__m256i *)(&out[iN[z*2+17]]), s);
+    }
+
+    // Store
+    for (z = 0; z < 32; z++)
+        iN[z] += 32;
+}
+#endif
+
+#endif // RANS_INTERNAL_H
--- a/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr.c
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr.c
@ -0,0 +1,758 @@
+/*
+ * Copyright (c) 2017-2023 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <limits.h>
+
+#include "rANS_word.h"
+#include "rANS_static4x16.h"
+#include "rANS_static16_int.h"
+#include "varint.h"
+#include "utils.h"
+
+#define TF_SHIFT 12
+#define TOTFREQ (1<<TF_SHIFT)
+
+
+// 9-11 is considerably faster in the O1 variant due to reduced table size.
+// We auto-tune between 10 and 12 though.  Anywhere from 9 to 14 are viable.
+#ifndef TF_SHIFT_O1
+#define TF_SHIFT_O1 12
+#endif
+#ifndef TF_SHIFT_O1_FAST
+#define TF_SHIFT_O1_FAST 10
+#endif
+#define TOTFREQ_O1 (1<<TF_SHIFT_O1)
+#define TOTFREQ_O1_FAST (1<<TF_SHIFT_O1_FAST)
+
+
+#define NX 32
+
+unsigned char *rans_compress_O0_32x16(unsigned char *in,
+                                      unsigned int in_size,
+                                      unsigned char *out,
+                                      unsigned int *out_size) {
+    unsigned char *cp, *out_end, *out_free = NULL;
+    RansEncSymbol syms[256];
+    RansState ransN[NX];
+    uint8_t* ptr;
+    uint32_t F[256+MAGIC] = {0};
+    int i, j, tab_size = 0, x, z;
+    // -20 for order/size/meta
+    unsigned int bound = rans_compress_bound_4x16(in_size,0)-20;
+
+    if (!out) {
+        *out_size = bound;
+        out = out_free = malloc(*out_size);
+    }
+    if (!out || bound > *out_size)
+        return NULL;
+
+    // If "out" isn't word aligned, tweak out_end/ptr to ensure it is.
+    // We already added more round in bound to allow for this.
+    if (((size_t)out)&1)
+        bound--;
+    ptr = out_end = out + bound;
+
+    if (in_size == 0)
+        goto empty;
+
+    // Compute statistics
+    double e = hist8e(in, in_size, F);
+    int low_ent = e < 2;
+
+    // Normalise so frequences sum to power of 2
+    uint32_t fsum = in_size;
+    uint32_t max_val = round2(fsum);
+    if (max_val > TOTFREQ)
+        max_val = TOTFREQ;
+
+    if (normalise_freq(F, fsum, max_val) < 0) {
+        free(out_free);
+        return NULL;
+    }
+    fsum=max_val;
+
+    cp = out;
+    cp += encode_freq(cp, F);
+    tab_size = cp-out;
+    //write(2, out+4, cp-(out+4));
+
+    if (normalise_freq(F, fsum, TOTFREQ) < 0) {
+        free(out_free);
+        return NULL;
+    }
+
+    // Encode statistics.
+    for (x = j = 0; j < 256; j++) {
+        if (F[j]) {
+            RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT);
+            x += F[j];
+        }
+    }
+
+    for (z = 0; z < NX; z++)
+      RansEncInit(&ransN[z]);
+
+    z = i = in_size&(NX-1);
+    while (z-- > 0)
+      RansEncPutSymbol(&ransN[z], &ptr, &syms[in[in_size-(i-z)]]);
+
+    if (low_ent) {
+        // orig
+        // gcc   446
+        // clang 427
+        for (i=(in_size &~(NX-1)); likely(i>0); i-=NX) {
+            for (z = NX-1; z >= 0; z-=4) {
+                RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]];
+                RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]];
+                RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]];
+                RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]];
+                RansEncPutSymbol_branched(&ransN[z-0], &ptr, s0);
+                RansEncPutSymbol_branched(&ransN[z-1], &ptr, s1);
+                RansEncPutSymbol_branched(&ransN[z-2], &ptr, s2);
+                RansEncPutSymbol_branched(&ransN[z-3], &ptr, s3);
+                if (NX%8 == 0) {
+                    z -= 4;
+                    RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]];
+                    RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]];
+                    RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]];
+                    RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]];
+                    RansEncPutSymbol_branched(&ransN[z-0], &ptr, s0);
+                    RansEncPutSymbol_branched(&ransN[z-1], &ptr, s1);
+                    RansEncPutSymbol_branched(&ransN[z-2], &ptr, s2);
+                    RansEncPutSymbol_branched(&ransN[z-3], &ptr, s3);
+                }
+            }
+            if (z < -1) abort();
+        }
+    } else {
+        // Branchless version optimises poorly with gcc unless we have
+        // AVX2 capability, so have a custom rewrite of it.
+        uint16_t* ptr16 = (uint16_t *)ptr;
+        for (i=(in_size &~(NX-1)); likely(i>0); i-=NX) {
+            // Unrolled copy of below, because gcc doesn't optimise this
+            // well in the original form.
+            //
+            // Gcc11:   328 MB/s (this) vs 208 MB/s (orig)
+            // Clang10: 352 MB/s (this) vs 340 MB/s (orig)
+            //
+            // for (z = NX-1; z >= 0; z-=4) {
+            //  RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]];
+            //  RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]];
+            //  RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]];
+            //  RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]];
+            //  RansEncPutSymbol(&ransN[z-0], &ptr, s0);
+            //  RansEncPutSymbol(&ransN[z-1], &ptr, s1);
+            //  RansEncPutSymbol(&ransN[z-2], &ptr, s2);
+            //  RansEncPutSymbol(&ransN[z-3], &ptr, s3);
+            // }
+
+            for (z = NX-1; z >= 0; z-=4) {
+                // RansEncPutSymbol added in-situ
+                RansState *rp = &ransN[z]-3;
+                RansEncSymbol *sy[4];
+                uint8_t *C = &in[i-(NX-z)]-3;
+
+                sy[0] = &syms[C[3]];
+                sy[1] = &syms[C[2]];
+
+                int c0  = rp[3-0] > sy[0]->x_max;
+                int c1  = rp[3-1] > sy[1]->x_max;
+
+#ifdef HTSCODECS_LITTLE_ENDIAN
+                ptr16[-1] = rp[3-0]; ptr16 -= c0;
+                ptr16[-1] = rp[3-1]; ptr16 -= c1;
+#else
+                ((uint8_t *)&ptr16[-1])[0] = rp[3-0];
+                ((uint8_t *)&ptr16[-1])[1] = rp[3-0]>>8;
+                ptr16 -= c0;
+                ((uint8_t *)&ptr16[-1])[0] = rp[3-1];
+                ((uint8_t *)&ptr16[-1])[1] = rp[3-1]>>8;
+                ptr16 -= c1;
+#endif
+
+                rp[3-0] = c0 ? rp[3-0]>>16 : rp[3-0];
+                rp[3-1] = c1 ? rp[3-1]>>16 : rp[3-1];
+
+                sy[2] = &syms[C[1]];
+                sy[3] = &syms[C[0]];
+
+                int c2  = rp[3-2] > sy[2]->x_max;
+                int c3  = rp[3-3] > sy[3]->x_max;
+#ifdef HTSCODECS_LITTLE_ENDIAN
+                ptr16[-1] = rp[3-2]; ptr16 -= c2;
+                ptr16[-1] = rp[3-3]; ptr16 -= c3;
+#else
+                ((uint8_t *)&ptr16[-1])[0] = rp[3-2];
+                ((uint8_t *)&ptr16[-1])[1] = rp[3-2]>>8;
+                ptr16 -= c2;
+                ((uint8_t *)&ptr16[-1])[0] = rp[3-3];
+                ((uint8_t *)&ptr16[-1])[1] = rp[3-3]>>8;
+                ptr16 -= c3;
+#endif
+                rp[3-2] = c2 ? rp[3-2]>>16 : rp[3-2];
+                rp[3-3] = c3 ? rp[3-3]>>16 : rp[3-3];
+
+                int k;
+                for (k = 0; k < 4; k++) {
+                    uint64_t r64 = (uint64_t)rp[3-k];
+                    uint32_t q = (r64 * sy[k]->rcp_freq) >> sy[k]->rcp_shift;
+                    rp[3-k] += sy[k]->bias + q*sy[k]->cmpl_freq;
+                }
+            }
+            if (z < -1) abort();
+        }
+        ptr = (uint8_t *)ptr16;
+    }
+    for (z = NX-1; z >= 0; z--)
+        RansEncFlush(&ransN[z], &ptr);
+
+ empty:
+    // Finalise block size and return it
+    *out_size = (out_end - ptr) + tab_size;
+
+    memmove(out + tab_size, ptr, out_end-ptr);
+
+    return out;
+}
+
+unsigned char *rans_uncompress_O0_32x16(unsigned char *in,
+                                        unsigned int in_size,
+                                        unsigned char *out,
+                                        unsigned int out_sz) {
+    if (in_size < 16) // 4-states at least
+        return NULL;
+
+    if (out_sz >= INT_MAX)
+        return NULL; // protect against some overflow cases
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (out_sz > 100000)
+        return NULL;
+#endif
+
+    /* Load in the static tables */
+    unsigned char *cp = in, *out_free = NULL;
+    unsigned char *cp_end = in + in_size;
+    int i;
+    uint32_t s3[TOTFREQ]; // For TF_SHIFT <= 12
+
+    if (!out)
+        out_free = out = malloc(out_sz);
+    if (!out)
+        return NULL;
+
+    // Precompute reverse lookup of frequency.
+    uint32_t F[256] = {0}, fsum;
+    int fsz = decode_freq(cp, cp_end, F, &fsum);
+    if (!fsz)
+        goto err;
+    cp += fsz;
+
+    normalise_freq_shift(F, fsum, TOTFREQ);
+
+    // Build symbols; fixme, do as part of decode, see the _d variant
+    if (rans_F_to_s3(F, TF_SHIFT, s3))
+        goto err;
+
+    if (cp_end - cp < NX * 4)
+        goto err;
+
+    int z;
+    RansState R[NX];
+    for (z = 0; z < NX; z++) {
+        RansDecInit(&R[z], &cp);
+        if (R[z] < RANS_BYTE_L)
+            goto err;
+    }
+
+    int out_end = (out_sz&~(NX-1));
+    const uint32_t mask = (1u << TF_SHIFT)-1;
+    cp_end -= NX*2; // worst case for renorm bytes
+
+    // assume NX is divisible by 4
+    assert(NX%4==0);
+
+    // Unsafe loop with no ptr overflow checking within loop itself
+    for (i=0; likely(i < out_end && cp < cp_end); i+=NX) {
+        for (z = 0; z < NX; z+=4) {
+            uint32_t S[4];
+            S[0] = s3[R[z+0] & mask];
+            S[1] = s3[R[z+1] & mask];
+            S[2] = s3[R[z+2] & mask];
+            S[3] = s3[R[z+3] & mask];
+
+            R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT)
+                + ((S[0]>>8) & mask);
+            R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT)
+                + ((S[1]>>8) & mask);
+            R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT)
+                + ((S[2]>>8) & mask);
+            R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT)
+                + ((S[3]>>8) & mask);
+
+            out[i+z+0] = S[0];
+            out[i+z+1] = S[1];
+            out[i+z+2] = S[2];
+            out[i+z+3] = S[3];
+
+            RansDecRenorm(&R[z+0], &cp);
+            RansDecRenorm(&R[z+1], &cp);
+            RansDecRenorm(&R[z+2], &cp);
+            RansDecRenorm(&R[z+3], &cp);
+
+            if (NX%8==0) {
+                z += 4;
+                S[0] = s3[R[z+0] & mask];
+                S[1] = s3[R[z+1] & mask];
+                S[2] = s3[R[z+2] & mask];
+                S[3] = s3[R[z+3] & mask];
+
+                R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT)
+                    + ((S[0]>>8) & mask);
+                R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT)
+                    + ((S[1]>>8) & mask);
+                R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT)
+                    + ((S[2]>>8) & mask);
+                R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT)
+                    + ((S[3]>>8) & mask);
+
+                out[i+z+0] = S[0];
+                out[i+z+1] = S[1];
+                out[i+z+2] = S[2];
+                out[i+z+3] = S[3];
+
+                RansDecRenorm(&R[z+0], &cp);
+                RansDecRenorm(&R[z+1], &cp);
+                RansDecRenorm(&R[z+2], &cp);
+                RansDecRenorm(&R[z+3], &cp);
+            }
+        }
+    }
+
+    // Safe loop
+    for (; i < out_end; i+=NX) {
+        for (z = 0; z < NX; z+=4) {
+            uint32_t S[4];
+            S[0] = s3[R[z+0] & mask];
+            S[1] = s3[R[z+1] & mask];
+            S[2] = s3[R[z+2] & mask];
+            S[3] = s3[R[z+3] & mask];
+
+            R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT)
+                + ((S[0]>>8) & mask);
+            R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT)
+                + ((S[1]>>8) & mask);
+            R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT)
+                + ((S[2]>>8) & mask);
+            R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT)
+                + ((S[3]>>8) & mask);
+
+            out[i+z+0] = S[0];
+            out[i+z+1] = S[1];
+            out[i+z+2] = S[2];
+            out[i+z+3] = S[3];
+
+            RansDecRenormSafe(&R[z+0], &cp, cp_end+NX*2);
+            RansDecRenormSafe(&R[z+1], &cp, cp_end+NX*2);
+            RansDecRenormSafe(&R[z+2], &cp, cp_end+NX*2);
+            RansDecRenormSafe(&R[z+3], &cp, cp_end+NX*2);
+        }
+    }
+
+    for (z = out_sz & (NX-1); z-- > 0; )
+        out[out_end + z] = s3[R[z] & mask];
+
+    //fprintf(stderr, "    0 Decoded %d bytes\n", (int)(cp-in)); //c-size
+
+    return out;
+
+ err:
+    free(out_free);
+    return NULL;
+}
+
+
+//-----------------------------------------------------------------------------
+unsigned char *rans_compress_O1_32x16(unsigned char *in,
+                                      unsigned int in_size,
+                                      unsigned char *out,
+                                      unsigned int *out_size) {
+    unsigned char *cp, *out_end, *out_free = NULL;
+    unsigned int tab_size;
+    int bound = rans_compress_bound_4x16(in_size,1)-20, z;
+    RansState ransN[NX];
+
+    if (in_size < NX) // force O0 instead
+        return NULL;
+
+    if (!out) {
+        *out_size = bound;
+        out_free = out = malloc(*out_size);
+    }
+    if (!out || bound > *out_size)
+        return NULL;
+
+    if (((size_t)out)&1)
+        bound--;
+    out_end = out + bound;
+
+    RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms)));
+    if (!syms) {
+        free(out_free);
+        return NULL;
+    }
+
+    cp = out;
+    int shift = encode_freq1(in, in_size, 32, syms, &cp); 
+    if (shift < 0) {
+        free(out_free);
+        htscodecs_tls_free(syms);
+        return NULL;
+    }
+    tab_size = cp - out;
+
+    for (z = 0; z < NX; z++)
+      RansEncInit(&ransN[z]);
+
+    uint8_t* ptr = out_end;
+
+    int iN[NX], isz4 = in_size/NX, i;
+    for (z = 0; z < NX; z++)
+        iN[z] = (z+1)*isz4-2;
+
+    unsigned char lN[NX];
+    for (z = 0; z < NX; z++)
+        lN[z] = in[iN[z]+1];
+
+    // Deal with the remainder
+    z = NX-1;
+    lN[z] = in[in_size-1];
+    for (iN[z] = in_size-2; iN[z] > NX*isz4-2; iN[z]--) {
+        unsigned char c = in[iN[z]];
+        RansEncPutSymbol(&ransN[z], &ptr, &syms[c][lN[z]]);
+        lN[z] = c;
+    }
+
+    unsigned char *i32[NX];
+    for (i = 0; i < NX; i++)
+        i32[i] = &in[iN[i]];
+
+    for (; likely(i32[0] >= in); ) {
+        uint16_t *ptr16 = (uint16_t *)ptr;
+        for (z = NX-1; z >= 0; z-=4) {
+            RansEncSymbol *sy[4];
+            int k;
+
+            for (k = 0; k < 4; k++) {
+                sy[k] = &syms[*i32[z-k]][lN[z-k]];
+                lN[z-k] = *i32[z-k]--;
+            }
+
+            // RansEncPutSymbol added in-situ
+            for (k = 0; k < 4; k++) {
+                int c = ransN[z-k] > sy[k]->x_max;
+#ifdef HTSCODECS_LITTLE_ENDIAN
+                ptr16[-1] = ransN[z-k];
+#else
+                ((uint8_t *)&ptr16[-1])[0] = ransN[z-k];
+                ((uint8_t *)&ptr16[-1])[1] = ransN[z-k]>>8;
+#endif
+                ptr16 -= c;
+                //ransN[z-k] >>= c<<4;
+                ransN[z-k] = c ? ransN[z-k]>>16 : ransN[z-k];
+            }
+
+            for (k = 0; k < 4; k++) {
+                uint64_t r64 = ransN[z-k];
+                uint32_t q = (r64 * sy[k]->rcp_freq) >> sy[k]->rcp_shift;
+                ransN[z-k] += sy[k]->bias + q*sy[k]->cmpl_freq;
+            }
+        }
+        ptr = (uint8_t *)ptr16;
+    }
+
+    for (z = NX-1; z>=0; z--)
+        RansEncPutSymbol(&ransN[z], &ptr, &syms[0][lN[z]]);
+
+    for (z = NX-1; z>=0; z--)
+        RansEncFlush(&ransN[z], &ptr);
+
+    *out_size = (out_end - ptr) + tab_size;
+
+    cp = out;
+    memmove(out + tab_size, ptr, out_end-ptr);
+
+    htscodecs_tls_free(syms);
+    return out;
+}
+
+//#define MAGIC2 111
+#define MAGIC2 179
+//#define MAGIC2 0
+
+unsigned char *rans_uncompress_O1_32x16(unsigned char *in,
+                                        unsigned int in_size,
+                                        unsigned char *out,
+                                        unsigned int out_sz) {
+    if (in_size < NX*4) // 4-states at least
+        return NULL;
+
+    if (out_sz >= INT_MAX)
+        return NULL; // protect against some overflow cases
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (out_sz > 100000)
+        return NULL;
+#endif
+
+    /* Load in the static tables */
+    unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL;
+    unsigned char *c_freq = NULL;
+    int i;
+
+    /*
+     * Somewhat complex memory layout.
+     * With shift==12 (TF_SHIFT_O1) we fill out use both sfb and fb.
+     * With shift==10 (...O1_FAST)  we fill out and use s3 only.
+     *
+     * sfb+fb is larger, therefore we allocate this much memory.
+     */
+    uint8_t *sfb_ = htscodecs_tls_alloc(256*
+                                        ((TOTFREQ_O1+MAGIC2)*sizeof(*sfb_)
+                                         +256 * sizeof(fb_t)));
+    if (!sfb_)
+        return NULL;
+
+    // sfb and fb are consecutive
+    uint8_t *sfb[257];
+    if ((*cp >> 4) == TF_SHIFT_O1) {
+        for (i = 0; i <= 256; i++)
+            sfb[i]=  sfb_ + i*(TOTFREQ_O1+MAGIC2);
+    } else {
+        for (i = 0; i <= 256; i++)
+            sfb[i]=  sfb_ + i*(TOTFREQ_O1_FAST+MAGIC2);
+    }
+    fb_t (*fb)[256] = (fb_t (*)[256]) sfb[256];
+
+    // NOTE: s3 overlaps sfb/fb
+    uint32_t (*s3)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])sfb_;
+
+    if (!out)
+        out_free = out = malloc(out_sz);
+
+    if (!out)
+        goto err;
+
+    //fprintf(stderr, "out_sz=%d\n", out_sz);
+
+    // compressed header? If so uncompress it
+    unsigned char *tab_end = NULL;
+    unsigned char *c_freq_end = cp_end;
+    unsigned int shift = *cp >> 4;
+    if (*cp++ & 1) {
+        uint32_t u_freq_sz, c_freq_sz;
+        cp += var_get_u32(cp, cp_end, &u_freq_sz);
+        cp += var_get_u32(cp, cp_end, &c_freq_sz);
+        if (c_freq_sz > cp_end - cp)
+            goto err;
+        tab_end = cp + c_freq_sz;
+        if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL,u_freq_sz)))
+            goto err;
+        cp = c_freq;
+        c_freq_end = c_freq + u_freq_sz;
+    }
+
+    // Decode order-0 symbol list; avoids needing in order-1 tables
+    cp += decode_freq1(cp, c_freq_end, shift, NULL, s3, sfb, fb);
+
+    if (tab_end)
+        cp = tab_end;
+    free(c_freq);
+    c_freq = NULL;
+
+    if (cp_end - cp < NX * 4)
+        goto err;
+
+    RansState R[NX];
+    uint8_t *ptr = cp, *ptr_end = in + in_size - 2*NX;
+    int z;
+    for (z = 0; z < NX; z++) {
+        RansDecInit(&R[z], &ptr);
+        if (R[z] < RANS_BYTE_L)
+            goto err;
+    }
+
+    int isz4 = out_sz/NX;
+    int i4[NX], l[NX] = {0};
+    for (z = 0; z < NX; z++)
+        i4[z] = z*isz4;
+
+    const int low_ent = in_size < 0.2 * out_sz;
+
+    // Around 15% faster to specialise for 10/12 than to have one
+    // loop with shift as a variable.
+    if (shift == TF_SHIFT_O1) {
+        // TF_SHIFT_O1 = 12
+        const uint32_t mask = ((1u << TF_SHIFT_O1)-1);
+        for (; likely(i4[0] < isz4);) {
+            for (z = 0; z < NX; z+=4) {
+                uint16_t m[4], c[4];
+
+                c[0] = sfb[l[z+0]][m[0] = R[z+0] & mask];
+                c[1] = sfb[l[z+1]][m[1] = R[z+1] & mask];
+                c[2] = sfb[l[z+2]][m[2] = R[z+2] & mask];
+                c[3] = sfb[l[z+3]][m[3] = R[z+3] & mask];
+
+                R[z+0] = fb[l[z+0]][c[0]].f * (R[z+0]>>TF_SHIFT_O1);
+                R[z+0] += m[0] - fb[l[z+0]][c[0]].b;
+
+                R[z+1] = fb[l[z+1]][c[1]].f * (R[z+1]>>TF_SHIFT_O1);
+                R[z+1] += m[1] - fb[l[z+1]][c[1]].b;
+
+                R[z+2] = fb[l[z+2]][c[2]].f * (R[z+2]>>TF_SHIFT_O1);
+                R[z+2] += m[2] - fb[l[z+2]][c[2]].b;
+
+                R[z+3] = fb[l[z+3]][c[3]].f * (R[z+3]>>TF_SHIFT_O1);
+                R[z+3] += m[3] - fb[l[z+3]][c[3]].b;
+
+                out[i4[z+0]++] = l[z+0] = c[0];
+                out[i4[z+1]++] = l[z+1] = c[1];
+                out[i4[z+2]++] = l[z+2] = c[2];
+                out[i4[z+3]++] = l[z+3] = c[3];
+
+                if (!low_ent && likely(ptr < ptr_end)) {
+                    RansDecRenorm(&R[z+0], &ptr);
+                    RansDecRenorm(&R[z+1], &ptr);
+                    RansDecRenorm(&R[z+2], &ptr);
+                    RansDecRenorm(&R[z+3], &ptr);
+                } else {
+                    RansDecRenormSafe(&R[z+0], &ptr, ptr_end+2*NX);
+                    RansDecRenormSafe(&R[z+1], &ptr, ptr_end+2*NX);
+                    RansDecRenormSafe(&R[z+2], &ptr, ptr_end+2*NX);
+                    RansDecRenormSafe(&R[z+3], &ptr, ptr_end+2*NX);
+                }
+            }
+        }
+
+        // Remainder
+        for (; i4[NX-1] < out_sz; i4[NX-1]++) {
+            uint32_t m = R[NX-1] & ((1u<<TF_SHIFT_O1)-1);
+            unsigned char c = sfb[l[NX-1]][m];
+            out[i4[NX-1]] = c;
+            R[NX-1] = fb[l[NX-1]][c].f * (R[NX-1]>>TF_SHIFT_O1) +
+                m - fb[l[NX-1]][c].b;
+            RansDecRenormSafe(&R[NX-1], &ptr, ptr_end + 2*NX);
+            l[NX-1] = c;
+        }
+    } else {
+        // TF_SHIFT_O1 = 10
+        const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1);
+        for (; likely(i4[0] < isz4);) {
+            for (z = 0; z < NX; z+=4) {
+                // Merged sfb and fb into single s3 lookup.
+                // The m[4] array completely vanishes in this method.
+                uint32_t S[4] = {
+                    s3[l[z+0]][R[z+0] & mask],
+                    s3[l[z+1]][R[z+1] & mask],
+                    s3[l[z+2]][R[z+2] & mask],
+                    s3[l[z+3]][R[z+3] & mask],
+                };
+
+                l[z+0] = out[i4[z+0]++] = S[0];
+                l[z+1] = out[i4[z+1]++] = S[1];
+                l[z+2] = out[i4[z+2]++] = S[2];
+                l[z+3] = out[i4[z+3]++] = S[3];
+
+                uint32_t F[4] = {
+                    S[0]>>(TF_SHIFT_O1_FAST+8),
+                    S[1]>>(TF_SHIFT_O1_FAST+8),
+                    S[2]>>(TF_SHIFT_O1_FAST+8),
+                    S[3]>>(TF_SHIFT_O1_FAST+8),
+                };
+                uint32_t B[4] = {
+                    (S[0]>>8) & mask,
+                    (S[1]>>8) & mask,
+                    (S[2]>>8) & mask,
+                    (S[3]>>8) & mask,
+                };
+
+                R[z+0] = F[0] * (R[z+0]>>TF_SHIFT_O1_FAST) + B[0];
+                R[z+1] = F[1] * (R[z+1]>>TF_SHIFT_O1_FAST) + B[1];
+                R[z+2] = F[2] * (R[z+2]>>TF_SHIFT_O1_FAST) + B[2];
+                R[z+3] = F[3] * (R[z+3]>>TF_SHIFT_O1_FAST) + B[3];
+
+                if (!low_ent && (ptr < ptr_end)) {
+                    // branchless & asm
+                    RansDecRenorm(&R[z+0], &ptr);
+                    RansDecRenorm(&R[z+1], &ptr);
+                    RansDecRenorm(&R[z+2], &ptr);
+                    RansDecRenorm(&R[z+3], &ptr);
+                } else {
+                    // branched, but better when predictable
+                    RansDecRenormSafe(&R[z+0], &ptr, ptr_end+2*NX);
+                    RansDecRenormSafe(&R[z+1], &ptr, ptr_end+2*NX);
+                    RansDecRenormSafe(&R[z+2], &ptr, ptr_end+2*NX);
+                    RansDecRenormSafe(&R[z+3], &ptr, ptr_end+2*NX);
+                }
+            }
+        }
+
+        // Remainder
+        for (; i4[NX-1] < out_sz; i4[NX-1]++) {
+            uint32_t S = s3[l[NX-1]][R[NX-1] & ((1u<<TF_SHIFT_O1_FAST)-1)];
+            out[i4[NX-1]] = l[NX-1] = S&0xff;
+            R[NX-1] = (S>>(TF_SHIFT_O1_FAST+8)) * (R[NX-1]>>TF_SHIFT_O1_FAST)
+                + ((S>>8) & ((1u<<TF_SHIFT_O1_FAST)-1));
+            RansDecRenormSafe(&R[NX-1], &ptr, ptr_end + 2*NX);
+        }
+    }
+    //fprintf(stderr, "    1 Decoded %d bytes\n", (int)(ptr-in)); //c-size
+
+    htscodecs_tls_free(sfb_);
+    return out;
+
+ err:
+    htscodecs_tls_free(sfb_);
+    free(out_free);
+    free(c_freq);
+
+    return NULL;
+}
--- a/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr.h
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr.h
@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2017-2019 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RANS_STATIC32x16PR_H
+#define RANS_STATIC32x16PR_H
+
+/*
+ * This header contains standard scalar implementations of the 32-way
+ * unrolled rANS codec as well as declarations for the custom SIMD
+ * implementations of x86_64 and Arm Aarch64 CPUs.
+ *
+ * The AVX2 and AVX512 source files need to be compiled separately as
+ * we have per-file -march= compiler options and we don't wish to
+ * accidentally get AVX instructions in the scalar variant.  The x86-64
+ * binary then contains all 3 variants at the same time and selected
+ * automatically at run time.
+ *
+ * The ARM Neon version is currently different, as we don't have any test
+ * machines without this capability. I think it's default on all 64-bit
+ * CPUs, so it's not something we're concerned with.  For simplicity
+ * therefore the ARM code is simply #included into the scalar file.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//----------------------------------------------------------------------
+// Standard scalar versions
+unsigned char *rans_compress_O0_32x16(unsigned char *in,
+                                      unsigned int in_size,
+                                      unsigned char *out,
+                                      unsigned int *out_size);
+
+unsigned char *rans_uncompress_O0_32x16(unsigned char *in,
+                                        unsigned int in_size,
+                                        unsigned char *out,
+                                        unsigned int out_sz);
+
+unsigned char *rans_compress_O1_32x16(unsigned char *in,
+                                      unsigned int in_size,
+                                      unsigned char *out,
+                                      unsigned int *out_size);
+
+unsigned char *rans_uncompress_O1_32x16(unsigned char *in,
+                                        unsigned int in_size,
+                                        unsigned char *out,
+                                        unsigned int out_sz);
+
+//----------------------------------------------------------------------
+// Intel SSE4 implementation.  Only the O0 decoder for now
+#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT)
+unsigned char *rans_compress_O0_32x16_sse4(unsigned char *in,
+                                           unsigned int in_size,
+                                           unsigned char *out,
+                                           unsigned int *out_size);
+
+unsigned char *rans_uncompress_O0_32x16_sse4(unsigned char *in,
+                                             unsigned int in_size,
+                                             unsigned char *out,
+                                             unsigned int out_sz);
+
+unsigned char *rans_uncompress_O1_32x16_sse4(unsigned char *in,
+                                             unsigned int in_size,
+                                             unsigned char *out,
+                                             unsigned int out_sz);
+#endif
+
+//----------------------------------------------------------------------
+// Intel AVX2 implementation
+#ifdef HAVE_AVX2
+unsigned char *rans_compress_O0_32x16_avx2(unsigned char *in,
+                                           unsigned int in_size,
+                                           unsigned char *out,
+                                           unsigned int *out_size);
+
+unsigned char *rans_uncompress_O0_32x16_avx2(unsigned char *in,
+                                             unsigned int in_size,
+                                             unsigned char *out,
+                                             unsigned int out_sz);
+
+unsigned char *rans_compress_O1_32x16_avx2(unsigned char *in,
+                                           unsigned int in_size,
+                                           unsigned char *out,
+                                           unsigned int *out_size);
+
+unsigned char *rans_uncompress_O1_32x16_avx2(unsigned char *in,
+                                             unsigned int in_size,
+                                             unsigned char *out,
+                                             unsigned int out_sz);
+#endif // HAVE_AVX2
+
+//----------------------------------------------------------------------
+// Intel AVX512 implementation
+#ifdef HAVE_AVX512
+unsigned char *rans_compress_O0_32x16_avx512(unsigned char *in,
+                                             unsigned int in_size,
+                                             unsigned char *out,
+                                             unsigned int *out_size);
+
+unsigned char *rans_uncompress_O0_32x16_avx512(unsigned char *in,
+                                               unsigned int in_size,
+                                               unsigned char *out,
+                                               unsigned int out_sz);
+
+unsigned char *rans_compress_O1_32x16_avx512(unsigned char *in,
+                                             unsigned int in_size,
+                                             unsigned char *out,
+                                             unsigned int *out_size);
+
+unsigned char *rans_uncompress_O1_32x16_avx512(unsigned char *in,
+                                               unsigned int in_size,
+                                               unsigned char *out,
+                                               unsigned int out_sz);
+#endif // HAVE_AVX512
+
+//----------------------------------------------------------------------
+// Arm Neon implementation
+#if defined(__ARM_NEON) && defined(__aarch64__)
+unsigned char *rans_compress_O0_32x16_neon(unsigned char *in,
+                                           unsigned int in_size,
+                                           unsigned char *out,
+                                           unsigned int *out_size);
+
+unsigned char *rans_uncompress_O0_32x16_neon(unsigned char *in,
+                                             unsigned int in_size,
+                                             unsigned char *out,
+                                             unsigned int out_sz);
+
+unsigned char *rans_compress_O1_32x16_neon(unsigned char *in,
+                                           unsigned int in_size,
+                                           unsigned char *out,
+                                           unsigned int *out_size);
+
+unsigned char *rans_uncompress_O1_32x16_neon(unsigned char *in,
+                                             unsigned int in_size,
+                                             unsigned char *out,
+                                             unsigned int out_sz);
+#endif // ARM_NEON
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RANS_STATIC32x16PR_H */
--- a/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr_avx2.c
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr_avx2.c
--- a/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr_avx512.c
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr_avx512.c
--- a/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr_neon.c
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr_neon.c
--- a/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr_sse4.c
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static32x16pr_sse4.c
--- a/ext/htslib/htscodecs/htscodecs/rANS_static4x16.h
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static4x16.h
@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017-2019 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RANS_STATIC4x16_H
+#define RANS_STATIC4x16_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int rans_compress_bound_4x16(unsigned int size, int order);
+unsigned char *rans_compress_to_4x16(unsigned char *in,  unsigned int in_size,
+                                     unsigned char *out, unsigned int *out_size,
+                                     int order);
+unsigned char *rans_compress_4x16(unsigned char *in, unsigned int in_size,
+                                  unsigned int *out_size, int order);
+unsigned char *rans_uncompress_to_4x16(unsigned char *in,  unsigned int in_size,
+                                       unsigned char *out, unsigned int *out_size);
+unsigned char *rans_uncompress_4x16(unsigned char *in, unsigned int in_size,
+                                    unsigned int *out_size);
+
+// CPU detection control.  Used for testing and benchmarking.
+// These bitfields control what methods are permitted to be used.
+#define RANS_CPU_ENC_SSE4     (1<<0)
+#define RANS_CPU_ENC_AVX2     (2<<0)
+#define RANS_CPU_ENC_AVX512   (4<<0)
+#define RANS_CPU_ENC_NEON     (8<<0)
+
+#define RANS_CPU_DEC_SSE4     (1<<8)
+#define RANS_CPU_DEC_AVX2     (2<<8)
+#define RANS_CPU_DEC_AVX512   (4<<8)
+#define RANS_CPU_DEC_NEON     (8<<8)
+
+void rans_set_cpu(int opts);
+
+// "Order" byte options. ORed into the order byte.
+// The bottom bits are the order itself, currently
+// supporting order-0 and order-1 but with expansion room
+// up to order-3 (unlikely).
+
+//--
+// The values below are stored in the file format
+
+// Pack 2,4,8 or infinite symbols into a byte.
+#define RANS_ORDER_PACK   0x80
+
+// Run length encoding with runs & lits encoded separately
+#define RANS_ORDER_RLE    0x40
+
+// Nop; for tiny segments where rANS overhead is too big
+#define RANS_ORDER_CAT    0x20
+
+// Don't store the original size; used by STRIPE mode
+#define RANS_ORDER_NOSZ   0x10
+
+// For N-byte integer data; rotate & encode N streams.
+#define RANS_ORDER_STRIPE 0x08
+
+// 32-way unrolling instead of 4-way
+#define RANS_ORDER_X32    0x04
+
+//--
+// order values below are not directly part of the file format, but control
+// the behaviour of the encoder.
+
+// Bit 8-15 of order hold the stripe size (N).
+// Note: N is stored separately after the order byte
+
+// Used to disable order-0 in the STRIPE sub-methods.
+#define RANS_ORDER_STRIPE_NO0 (1<<16)
+
+// Used to request automatic selection between 4-way and 32-way
+#define RANS_ORDER_SIMD_AUTO  (1<<17)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RANS_STATIC4x16_H */
--- a/ext/htslib/htscodecs/htscodecs/rANS_static4x16pr.c
+++ b/ext/htslib/htscodecs/htscodecs/rANS_static4x16pr.c
--- a/ext/htslib/htscodecs/htscodecs/rANS_word.h
+++ b/ext/htslib/htscodecs/htscodecs/rANS_word.h
@ -0,0 +1,478 @@
+/* rans_byte.h originally from https://github.com/rygorous/ryg_rans
+ *
+ * This is a public-domain implementation of several rANS variants. rANS is an
+ * entropy coder from the ANS family, as described in Jarek Duda's paper
+ * "Asymmetric numeral systems" (http://arxiv.org/abs/1311.2540).
+ */
+
+/*-------------------------------------------------------------------------- */
+/* rans_byte.h from https://github.com/rygorous/ryg_rans */
+
+// Simple byte-aligned rANS encoder/decoder - public domain - Fabian 'ryg' Giesen 2014
+//
+// Not intended to be "industrial strength"; just meant to illustrate the general
+// idea.
+
+#ifndef RANS_WORD_HEADER
+#define RANS_WORD_HEADER
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include "htscodecs_endian.h"
+
+#ifdef assert
+#define RansAssert assert
+#else
+#define RansAssert(x)
+#endif
+
+// READ ME FIRST:
+//
+// This is designed like a typical arithmetic coder API, but there's three
+// twists you absolutely should be aware of before you start hacking:
+//
+// 1. You need to encode data in *reverse* - last symbol first. rANS works
+//    like a stack: last in, first out.
+// 2. Likewise, the encoder outputs bytes *in reverse* - that is, you give
+//    it a pointer to the *end* of your buffer (exclusive), and it will
+//    slowly move towards the beginning as more bytes are emitted.
+// 3. Unlike basically any other entropy coder implementation you might
+//    have used, you can interleave data from multiple independent rANS
+//    encoders into the same bytestream without any extra signaling;
+//    you can also just write some bytes by yourself in the middle if
+//    you want to. This is in addition to the usual arithmetic encoder
+//    property of being able to switch models on the fly. Writing raw
+//    bytes can be useful when you have some data that you know is
+//    incompressible, and is cheaper than going through the rANS encode
+//    function. Using multiple rANS coders on the same byte stream wastes
+//    a few bytes compared to using just one, but execution of two
+//    independent encoders can happen in parallel on superscalar and
+//    Out-of-Order CPUs, so this can be *much* faster in tight decoding
+//    loops.
+//
+//    This is why all the rANS functions take the write pointer as an
+//    argument instead of just storing it in some context struct.
+
+// --------------------------------------------------------------------------
+
+// L ('l' in the paper) is the lower bound of our normalization interval.
+// Between this and our byte-aligned emission, we use 31 (not 32!) bits.
+// This is done intentionally because exact reciprocals for 31-bit uints
+// fit in 32-bit uints: this permits some optimizations during encoding.
+#define RANS_BYTE_L (1u << 15)  // lower bound of our normalization interval
+
+// State for a rANS encoder. Yep, that's all there is to it.
+typedef uint32_t RansState;
+
+// Initialize a rANS encoder.
+static inline void RansEncInit(RansState* r)
+{
+    *r = RANS_BYTE_L;
+}
+
+// Renormalize the encoder. Internal function.
+static inline RansState RansEncRenorm(RansState x, uint8_t** pptr, uint32_t freq, uint32_t scale_bits)
+{
+    uint32_t x_max = ((RANS_BYTE_L >> scale_bits) << 16) * freq-1; // this turns into a shift.
+    if (x > x_max) {
+        uint16_t* ptr = (uint16_t *)*pptr;
+        *--ptr = (uint16_t) (x & 0xffff);
+        x >>= 16;
+        *pptr = (uint8_t *)ptr;
+    }
+    return x;
+}
+
+// Encodes a single symbol with range start "start" and frequency "freq".
+// All frequencies are assumed to sum to "1 << scale_bits", and the
+// resulting bytes get written to ptr (which is updated).
+//
+// NOTE: With rANS, you need to encode symbols in *reverse order*, i.e. from
+// beginning to end! Likewise, the output bytestream is written *backwards*:
+// ptr starts pointing at the end of the output buffer and keeps decrementing.
+static inline void RansEncPut(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)
+{
+    // renormalize
+    RansState x = RansEncRenorm(*r, pptr, freq, scale_bits);
+
+    // x = C(s,x)
+    *r = ((x / freq) << scale_bits) + (x % freq) + start;
+}
+
+// Flushes the rANS encoder.
+static inline void RansEncFlush(RansState* r, uint8_t** pptr)
+{
+    uint32_t x = *r;
+    uint8_t* ptr = *pptr;
+
+    ptr -= 4;
+    ptr[0] = (uint8_t) (x >> 0);
+    ptr[1] = (uint8_t) (x >> 8);
+    ptr[2] = (uint8_t) (x >> 16);
+    ptr[3] = (uint8_t) (x >> 24);
+
+    *pptr = ptr;
+}
+
+// Initializes a rANS decoder.
+// Unlike the encoder, the decoder works forwards as you'd expect.
+static inline void RansDecInit(RansState* r, uint8_t** pptr)
+{
+    uint32_t x;
+    uint8_t* ptr = *pptr;
+
+    x  = ptr[0] << 0;
+    x |= ptr[1] << 8;
+    x |= ptr[2] << 16;
+    x |= ((uint32_t)ptr[3]) << 24;
+    ptr += 4;
+
+    *pptr = ptr;
+    *r = x;
+}
+
+// Returns the current cumulative frequency (map it to a symbol yourself!)
+static inline uint32_t RansDecGet(RansState* r, uint32_t scale_bits)
+{
+    return *r & ((1u << scale_bits) - 1);
+}
+
+// Advances in the bit stream by "popping" a single symbol with range start
+// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits",
+// and the resulting bytes get written to ptr (which is updated).
+static inline void RansDecAdvance(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)
+{
+    uint32_t mask = (1u << scale_bits) - 1;
+
+    // s, x = D(x)
+    uint32_t x = *r;
+    x = freq * (x >> scale_bits) + (x & mask) - start;
+
+    // renormalize
+    if (x < RANS_BYTE_L) {
+        uint8_t* ptr = *pptr;
+        do x = (x << 8) | *ptr++; while (x < RANS_BYTE_L);
+        *pptr = ptr;
+    }
+
+    *r = x;
+}
+
+// --------------------------------------------------------------------------
+
+// That's all you need for a full encoder; below here are some utility
+// functions with extra convenience or optimizations.
+
+// Encoder symbol description
+// This (admittedly odd) selection of parameters was chosen to make
+// RansEncPutSymbol as cheap as possible.
+typedef struct {
+    uint32_t x_max;     // (Exclusive) upper bound of pre-normalization interval
+    uint32_t rcp_freq;  // Fixed-point reciprocal frequency
+    uint32_t bias;      // Bias
+
+    // NB: This pair are read as a 32-bit value by the SIMD o1 encoder.
+    uint16_t cmpl_freq; // Complement of frequency: (1 << scale_bits) - freq
+    uint16_t rcp_shift; // Reciprocal shift
+} RansEncSymbol;
+
+// As above, but with cmpl_freq and rcp_shift combined into
+// a single value.  This could be done with a cast, but it avoids
+// a type punning error.  We could use a union, but anonymous unions
+// are C11 only (still that's 10 year old!).  For now we just cheat
+// instead.
+typedef struct {
+    uint32_t x_max;     // (Exclusive) upper bound of pre-normalization interval
+    uint32_t rcp_freq;  // Fixed-point reciprocal frequency
+    uint32_t bias;      // Bias
+
+    uint32_t cmpl_freq; // cmpl_freq+rcp_shift
+} RansEncSymbol_simd;
+
+// Decoder symbols are straightforward.
+typedef struct {
+    uint16_t start;     // Start of range.
+    uint16_t freq;      // Symbol frequency.
+} RansDecSymbol;
+
+// Initializes an encoder symbol to start "start" and frequency "freq"
+static inline void RansEncSymbolInit(RansEncSymbol* s, uint32_t start, uint32_t freq, uint32_t scale_bits)
+{
+    RansAssert(scale_bits <= 16);
+    RansAssert(start <= (1u << scale_bits));
+    RansAssert(freq <= (1u << scale_bits) - start);
+
+    // Say M := 1 << scale_bits.
+    //
+    // The original encoder does:
+    //   x_new = (x/freq)*M + start + (x%freq)
+    //
+    // The fast encoder does (schematically):
+    //   q     = mul_hi(x, rcp_freq) >> rcp_shift   (division)
+    //   r     = x - q*freq                         (remainder)
+    //   x_new = q*M + bias + r                     (new x)
+    // plugging in r into x_new yields:
+    //   x_new = bias + x + q*(M - freq)
+    //        =: bias + x + q*cmpl_freq             (*)
+    //
+    // and we can just precompute cmpl_freq. Now we just need to
+    // set up our parameters such that the original encoder and
+    // the fast encoder agree.
+    
+    s->x_max = ((RANS_BYTE_L >> scale_bits) << 16) * freq -1;
+    s->cmpl_freq = (uint16_t) ((1 << scale_bits) - freq);
+    if (freq < 2) {
+        // freq=0 symbols are never valid to encode, so it doesn't matter what
+        // we set our values to.
+        //
+        // freq=1 is tricky, since the reciprocal of 1 is 1; unfortunately,
+        // our fixed-point reciprocal approximation can only multiply by values
+        // smaller than 1.
+        //
+        // So we use the "next best thing": rcp_freq=0xffffffff, rcp_shift=0.
+        // This gives:
+        //   q = mul_hi(x, rcp_freq) >> rcp_shift
+        //     = mul_hi(x, (1<<32) - 1)) >> 0
+        //     = floor(x - x/(2^32))
+        //     = x - 1 if 1 <= x < 2^32
+        // and we know that x>0 (x=0 is never in a valid normalization interval).
+        //
+        // So we now need to choose the other parameters such that
+        //   x_new = x*M + start
+        // plug it in:
+        //     x*M + start                   (desired result)
+        //   = bias + x + q*cmpl_freq        (*)
+        //   = bias + x + (x - 1)*(M - 1)    (plug in q=x-1, cmpl_freq)
+        //   = bias + 1 + (x - 1)*M
+        //   = x*M + (bias + 1 - M)
+        //
+        // so we have start = bias + 1 - M, or equivalently
+        //   bias = start + M - 1.
+        s->rcp_freq = ~0u;
+        s->rcp_shift = 0;
+        s->bias = start + (1 << scale_bits) - 1;
+    } else {
+        // Alverson, "Integer Division using reciprocals"
+        // shift=ceil(log2(freq))
+        uint32_t shift = 0;
+        while (freq > (1u << shift))
+            shift++;
+
+        s->rcp_freq = (uint32_t) (((1ull << (shift + 31)) + freq-1) / freq);
+        s->rcp_shift = shift - 1;
+
+        // With these values, 'q' is the correct quotient, so we
+        // have bias=start.
+        s->bias = start;
+    }
+
+    s->rcp_shift += 32; // Avoid the extra >>32 in RansEncPutSymbol
+}
+
+// Initialize a decoder symbol to start "start" and frequency "freq"
+static inline void RansDecSymbolInit(RansDecSymbol* s, uint32_t start, uint32_t freq)
+{
+    RansAssert(start <= (1 << 16));
+    RansAssert(freq <= (1 << 16) - start);
+    s->start = (uint16_t) start;
+    s->freq = (uint16_t) freq;
+}
+
+// Encodes a given symbol. This is faster than straight RansEnc since we can do
+// multiplications instead of a divide.
+//
+// See RansEncSymbolInit for a description of how this works.
+static inline void RansEncPutSymbol(RansState* r, uint8_t** pptr, RansEncSymbol const* sym)
+{
+    //RansAssert(sym->x_max != 0); // can't encode symbol with freq=0
+
+    // renormalize
+    uint32_t x = *r;
+    uint32_t x_max = sym->x_max;
+
+#ifdef HTSCODECS_LITTLE_ENDIAN
+    // Branchless renorm.
+    //
+    // This works best on high entropy data where branch prediction
+    // is poor.
+    //
+    // Note the bit-packing and RLE modes are more likely to be used on
+    // low entropy data, making this assertion generally true.  See
+    // RansEncPutSymbol_branched for a low-entropy optimised function.
+
+    // NB: "(x > x_max)*2" turns back into branched code with gcc.
+    int c = (x > x_max); c*=2;
+    memcpy(*pptr-2, &x, 2);
+    x >>= c*8;
+    *pptr = *pptr - c;
+#else
+    if (x > x_max) {
+        uint8_t* ptr = *pptr;
+        ptr -= 2;
+        ptr[0] = x & 0xff;
+        ptr[1] = (x >> 8) & 0xff;
+        x >>= 16;
+        *pptr = ptr;
+    }
+#endif
+
+    // x = C(s,x)
+    // NOTE: written this way so we get a 32-bit "multiply high" when
+    // available. If you're on a 64-bit platform with cheap multiplies
+    // (e.g. x64), just bake the +32 into rcp_shift.
+    //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift;
+
+    // Slow method, but robust
+//    *r = ((x / sym->freq) << sym->scale_bits) + (x % sym->freq) + sym->start;
+//    return;
+
+    // The extra >>32 has already been added to RansEncSymbolInit
+    uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift);
+    *r = x + sym->bias + q * sym->cmpl_freq;
+
+//    assert(((x / sym->freq) << sym->scale_bits) + (x % sym->freq) + sym->start == *r);
+}
+
+static inline void RansEncPutSymbol_branched(RansState* r, uint8_t** pptr, RansEncSymbol const* sym)
+{
+    //RansAssert(sym->x_max != 0); // can't encode symbol with freq=0
+
+    // renormalize
+    uint32_t x = *r;
+    uint32_t x_max = sym->x_max;
+
+#ifdef HTSCODECS_LITTLE_ENDIAN
+    // The old non-branchless method
+    if (x > x_max) {
+        (*pptr) -= 2;
+        memcpy(*pptr, &x, 2);
+        x >>= 16;
+    }
+#else
+    if (x > x_max) {
+        uint8_t* ptr = *pptr;
+        ptr -= 2;
+        ptr[0] = x & 0xff;
+        ptr[1] = (x >> 8) & 0xff;
+        x >>= 16;
+        *pptr = ptr;
+    }
+#endif
+
+    // x = C(s,x)
+    // NOTE: written this way so we get a 32-bit "multiply high" when
+    // available. If you're on a 64-bit platform with cheap multiplies
+    // (e.g. x64), just bake the +32 into rcp_shift.
+    //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift;
+
+    // Slow method, but robust
+//    *r = ((x / sym->freq) << sym->scale_bits) + (x % sym->freq) + sym->start;
+//    return;
+
+    // The extra >>32 has already been added to RansEncSymbolInit
+    uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift);
+    *r = x + sym->bias + q * sym->cmpl_freq;
+
+//    assert(((x / sym->freq) << sym->scale_bits) + (x % sym->freq) + sym->start == *r);
+}
+
+// Equivalent to RansDecAdvance that takes a symbol.
+static inline void RansDecAdvanceSymbol(RansState* r, uint8_t** pptr, RansDecSymbol const* sym, uint32_t scale_bits)
+{
+    RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits);
+}
+
+// Advances in the bit stream by "popping" a single symbol with range start
+// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits".
+// No renormalization or output happens.
+static inline void RansDecAdvanceStep(RansState* r, uint32_t start, uint32_t freq, uint32_t scale_bits)
+{
+    uint32_t mask = (1u << scale_bits) - 1;
+
+    // s, x = D(x)
+    uint32_t x = *r;
+    *r = freq * (x >> scale_bits) + (x & mask) - start;
+}
+
+// Equivalent to RansDecAdvanceStep that takes a symbol.
+static inline void RansDecAdvanceSymbolStep(RansState* r, RansDecSymbol const* sym, uint32_t scale_bits)
+{
+    RansDecAdvanceStep(r, sym->start, sym->freq, scale_bits);
+}
+
+// Renormalize.
+
+#if defined(__x86_64) && !defined(__ILP32__)
+
+/*
+ * Assembly variants of the RansDecRenorm code.
+ * These are based on joint ideas from Rob Davies and from looking at
+ * the clang assembly output.
+ */
+static inline void RansDecRenorm(RansState* r, uint8_t** pptr) {
+    //       q4        q40
+    // clang 730/608   717/467
+    // gcc8  733/588   737/458
+    uint32_t  x   = *r;
+    uint8_t  *ptr = *pptr;
+    __asm__ ("movzwl (%0),  %%eax\n\t"
+             "mov    %1,    %%edx\n\t"
+             "shl    $0x10, %%edx\n\t"
+             "or     %%eax, %%edx\n\t"
+             "xor    %%eax, %%eax\n\t"
+             "cmp    $0x8000,%1\n\t"
+             "cmovb  %%edx, %1\n\t"
+             "lea    2(%0), %%rax\n\t"
+             "cmovb  %%rax, %0\n\t"
+             : "=r" (ptr), "=r" (x)
+             : "0"  (ptr), "1"  (x)
+             : "eax", "edx"
+             );
+    *pptr = (uint8_t *)ptr;
+    *r = x;
+}
+
+#else /* __x86_64 */
+
+static inline void RansDecRenorm(RansState* r, uint8_t** pptr)
+{
+    // renormalize, branchless
+    uint32_t x = *r;
+    int cmp = (x < RANS_BYTE_L)*2;
+    uint32_t y = (*pptr)[0] + ((*pptr)[1]<<8);
+    uint32_t x2 = (x << 16) | y;
+    x = cmp ? x2 : x;
+    (*pptr) += cmp;
+    *r = x;
+
+//    // renormalize, branched.  Faster on low-complexity data, but generally
+//    // that is best compressed with PACK and/or RLE which turns it back
+//    // into high complexity data.
+//    uint32_t x = *r;
+//    uint32_t y = (*pptr)[0] | ((*pptr)[1]<<8);
+//
+//    if (x < RANS_BYTE_L)
+//      (*pptr)+=2;
+//    if (x < RANS_BYTE_L)
+//      x = (x << 16) | y;
+//
+//    *r = x;
+}
+#endif /* __x86_64 */
+
+// Note the data may not be word aligned here.
+// This function is only used sparingly, for the last few bytes in the buffer,
+// so speed isn't critical.
+static inline void RansDecRenormSafe(RansState* r, uint8_t** pptr, uint8_t *ptr_end)
+{
+    uint32_t x = *r;
+    if (x >= RANS_BYTE_L || *pptr+1 >= ptr_end) return;
+    uint16_t y = (*pptr)[0] + ((*pptr)[1]<<8);
+    x = (x << 16) | y;
+    (*pptr) += 2;
+    *r = x;
+}
+
+#endif // RANS_WORD_HEADER
--- a/ext/htslib/htscodecs/htscodecs/rle.c
+++ b/ext/htslib/htscodecs/htscodecs/rle.c
@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2019-2021 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "varint.h"
+#include "rle.h"
+
+#define MAGIC 8
+
+//-----------------------------------------------------------------------------
+// Auto compute rle_syms / rle_nsyms
+static void rle_find_syms(uint8_t *data, uint64_t data_len,
+                          int64_t *saved, // dim >= 256 
+                          uint8_t *rle_syms, int *rle_nsyms) {
+    int last = -1, n;
+    uint64_t i;
+
+    if (data_len > 256) {
+        // 186/450
+        // Interleaved buffers to avoid cache collisions
+        int64_t saved2[256+MAGIC] = {0};
+        int64_t saved3[256+MAGIC] = {0};
+        int64_t saved4[256+MAGIC] = {0};
+        int64_t len4 = data_len&~3;
+        for (i = 0; i < len4; i+=4) {
+            int d1 = (data[i+0] == last)     <<1;
+            int d2 = (data[i+1] == data[i+0])<<1;
+            int d3 = (data[i+2] == data[i+1])<<1;
+            int d4 = (data[i+3] == data[i+2])<<1;
+            last = data[i+3];
+            saved [data[i+0]] += d1-1;
+            saved2[data[i+1]] += d2-1;
+            saved3[data[i+2]] += d3-1;
+            saved4[data[i+3]] += d4-1;
+        }
+        while (i < data_len) {
+            int d = (data[i] == last)<<1;
+            saved[data[i]] += d - 1;
+            last = data[i];
+            i++;
+        }
+        for (i = 0; i < 256; i++)
+            saved[i] += saved2[i] + saved3[i] + saved4[i];
+    } else {
+        // 163/391
+        for (i = 0; i < data_len; i++) {
+            if (data[i] == last) {
+                saved[data[i]]++;
+            } else {
+                saved[data[i]]--;
+                last = data[i];
+            }
+        }
+    }
+
+    // Map back to a list
+    for (i = n = 0; i < 256; i++) {
+        if (saved[i] > 0)
+            rle_syms[n++] = i;
+    }
+    *rle_nsyms = n;
+}
+
+uint8_t *hts_rle_encode(uint8_t *data, uint64_t data_len,
+                        uint8_t *run,  uint64_t *run_len,
+                        uint8_t *rle_syms, int *rle_nsyms,
+                        uint8_t *out, uint64_t *out_len) {
+    uint64_t i, j, k;
+    if (!out)
+        if (!(out = malloc(data_len*2)))
+            return NULL;
+
+    // Two pass:  Firstly compute which symbols are worth using RLE on.
+    int64_t saved[256+MAGIC] = {0};
+
+    if (*rle_nsyms) {
+        for (i = 0; i < *rle_nsyms; i++)
+            saved[rle_syms[i]] = 1;
+    } else {
+        // Writes back to rle_syms and rle_nsyms
+        rle_find_syms(data, data_len, saved, rle_syms, rle_nsyms);
+    }
+
+    // 2nd pass: perform RLE itself to out[] and run[] arrays.
+    for (i = j = k = 0; i < data_len; i++) {
+        out[k++] = data[i];
+        if (saved[data[i]] > 0) {
+            int rlen = i;
+            int last = data[i];
+            while (i < data_len && data[i] == last)
+                i++;
+            i--;
+            rlen = i-rlen;
+
+            j += var_put_u32(&run[j], NULL, rlen);
+        }
+    }
+    
+    *run_len = j;
+    *out_len = k;
+    return out;
+}
+
+// On input *out_len holds the allocated size of out[].
+// On output it holds the used size of out[].
+uint8_t *hts_rle_decode(uint8_t *lit, uint64_t lit_len,
+                        uint8_t *run, uint64_t run_len,
+                        uint8_t *rle_syms, int rle_nsyms,
+                        uint8_t *out, uint64_t *out_len) {
+    uint64_t j;
+    uint8_t *run_end = run + run_len;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (*out_len > 100000)
+        return NULL;
+#endif
+
+    int saved[256] = {0};
+    for (j = 0; j < rle_nsyms; j++)
+        saved[rle_syms[j]] = 1;
+
+    uint8_t *lit_end = lit + lit_len;
+    uint8_t *out_end = out + *out_len;
+    uint8_t *outp = out;
+
+    while (lit < lit_end) {
+        if (outp >= out_end)
+            goto err;
+
+        uint8_t b = *lit;
+        if (saved[b]) {
+            uint32_t rlen;
+            run += var_get_u32(run, run_end, &rlen);
+            if (rlen) {
+                if (outp + rlen >= out_end)
+                    goto err;
+                memset(outp, b, rlen+1);
+                outp += rlen+1;
+            } else {
+                *outp++ = b;
+            }
+        } else {
+            *outp++ = b;
+        }
+        lit++;
+    }
+
+    *out_len = outp-out;
+    return out;
+
+ err:
+    return NULL;
+}
+
+// Deprecated interface; to remove when we next to an ABI breakage
+uint8_t *rle_encode(uint8_t *data, uint64_t data_len,
+                    uint8_t *run,  uint64_t *run_len,
+                    uint8_t *rle_syms, int *rle_nsyms,
+                    uint8_t *out, uint64_t *out_len) {
+    return hts_rle_encode(data, data_len, run, run_len,
+                          rle_syms, rle_nsyms, out, out_len);
+}
+
+// Deprecated interface; to remove when we next to an ABI breakage
+uint8_t *rle_decode(uint8_t *lit, uint64_t lit_len,
+                    uint8_t *run, uint64_t run_len,
+                    uint8_t *rle_syms, int rle_nsyms,
+                    uint8_t *out, uint64_t *out_len) {
+    return hts_rle_decode(lit, lit_len, run, run_len,
+                          rle_syms, rle_nsyms, out, out_len);
+}
--- a/ext/htslib/htscodecs/htscodecs/rle.h
+++ b/ext/htslib/htscodecs/htscodecs/rle.h
@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2019 Genome Research Ltd.
+ * Author(s): James Bonfield
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+ *       Institute nor the names of its contributors may be used to endorse
+ *       or promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTS_RLE_H
+#define HTS_RLE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Performs run length encoding of a byte stream, turning it into a
+ * list of lengths and a list of literals.
+ *
+ * The method used is a bit different to traditional run length
+ * encoding.  It always outputs run-lengths for symbols in the
+ * 'rle_syms' list (even if that length is +0 more), and never outputs
+ * lengths for symbols not in that list.
+ *
+ * "run" should be preallocated to be large enough;
+ * e.g at least data_len bytes long as a worse case.
+ * "rle_syms" should be allocated to be at least 256 bytes.
+ *
+ * If *rle_nsyms is zero this function will survey the input data
+ * first to choose symbols automatically, writing back to rle_syms and
+ * rle_nsyms.
+ *
+ * The "out" buffer may be passed in as NULL in which case it is
+ * allocated and returned (and is up to the caller to free).
+ * Otherwise if specified as non-NULL it will be written to, but
+ * it is up to the caller to ensure the buffer size is large enough.
+ * A worst case scenario is 2*data_len.
+ *
+ * Returns the literal buffer on success with new length in out_len,
+ *         also fills out run buffer and run_len,  and potentially
+ *         updates rle_syms / rle_nsyms too.
+ * Returns NULL of failure
+ */
+uint8_t *hts_rle_encode(uint8_t *data, uint64_t data_len,
+                        uint8_t *run,  uint64_t *run_len,
+                        uint8_t *rle_syms, int *rle_nsyms,
+                        uint8_t *out, uint64_t *out_len);
+
+/*
+ * Expands a run lengthed data steam from a pair of literal and
+ * run-length buffers.
+ *
+ * On input *out_len holds the length of the supplied out
+ * buffer.  On exit, it holds the used portion of this buffer.
+ *
+ * Returns uncompressed data (out) on success,
+ *         NULL on failure.
+ */
+uint8_t *hts_rle_decode(uint8_t *lit, uint64_t lit_len,
+                        uint8_t *run, uint64_t run_len,
+                        uint8_t *rle_syms, int rle_nsyms,
+                        uint8_t *out, uint64_t *out_len);
+
+// TODO: Add rle scanning func to compute rle_syms.
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTS_RLE_H */
--- a/ext/htslib/htscodecs/htscodecs/tokenise_name3.c
+++ b/ext/htslib/htscodecs/htscodecs/tokenise_name3.c
--- a/Show More
+++ b/Show More