Fixing zlib and libpng build on Linux

2018-05-21 17:42:46 +02:00 · 2018-05-21 17:42:46 +02:00 · b3c49cbbe6
commit b3c49cbbe6
parent 6cc4e99632
23 changed files with 5704 additions and 5 deletions
--- a/xs/CMakeLists.txt
+++ b/xs/CMakeLists.txt
@ -351,13 +351,18 @@ if(RASTERIZER_USE_SYSTEM_LIBPNG)
    target_compile_definitions(rasterizer PRIVATE ${PNG_DEFINITIONS})
 else()
    add_subdirectory( ${LIBDIR}/png/zlib)
-    add_subdirectory( ${LIBDIR}/png/libpng )

-    target_include_directories( png_static PRIVATE
+    set(ZLIB_INCLUDE_DIR
        ${LIBDIR}/png/zlib
        ${CMAKE_CURRENT_BINARY_DIR}/src/png/zlib
        )

+    add_subdirectory( ${LIBDIR}/png/libpng )
+
+    target_include_directories(rasterizer PRIVATE
+        ${LIBDIR}/png/libpng
+        ${CMAKE_CURRENT_BINARY_DIR}/src/png/libpng
+        )
    target_link_libraries(rasterizer PRIVATE png_static zlibstatic)
 endif()

--- a/xs/src/png/libpng/arm/arm_init.c
+++ b/xs/src/png/libpng/arm/arm_init.c
@ -0,0 +1,135 @@
+
+/* arm_init.c - NEON optimised filter functions
+ *
+ * Copyright (c) 2014,2016 Glenn Randers-Pehrson
+ * Written by Mans Rullgard, 2011.
+ * Last changed in libpng 1.6.22 [May 26, 2016]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
+ * called.
+ */
+#define _POSIX_SOURCE 1
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+#if PNG_ARM_NEON_OPT > 0
+#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */
+/* WARNING: it is strongly recommended that you do not build libpng with
+ * run-time checks for CPU features if at all possible.  In the case of the ARM
+ * NEON instructions there is no processor-specific way of detecting the
+ * presence of the required support, therefore run-time detection is extremely
+ * OS specific.
+ *
+ * You may set the macro PNG_ARM_NEON_FILE to the file name of file containing
+ * a fragment of C source code which defines the png_have_neon function.  There
+ * are a number of implementations in contrib/arm-neon, but the only one that
+ * has partial support is contrib/arm-neon/linux.c - a generic Linux
+ * implementation which reads /proc/cpufino.
+ */
+#ifndef PNG_ARM_NEON_FILE
+#  ifdef __linux__
+#     define PNG_ARM_NEON_FILE "contrib/arm-neon/linux.c"
+#  endif
+#endif
+
+#ifdef PNG_ARM_NEON_FILE
+
+#include <signal.h> /* for sig_atomic_t */
+static int png_have_neon(png_structp png_ptr);
+#include PNG_ARM_NEON_FILE
+
+#else  /* PNG_ARM_NEON_FILE */
+#  error "PNG_ARM_NEON_FILE undefined: no support for run-time ARM NEON checks"
+#endif /* PNG_ARM_NEON_FILE */
+#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */
+
+#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
+#  error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
+#endif
+
+void
+png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
+{
+   /* The switch statement is compiled in for ARM_NEON_API, the call to
+    * png_have_neon is compiled in for ARM_NEON_CHECK.  If both are defined
+    * the check is only performed if the API has not set the NEON option on
+    * or off explicitly.  In this case the check controls what happens.
+    *
+    * If the CHECK is not compiled in and the option is UNSET the behavior prior
+    * to 1.6.7 was to use the NEON code - this was a bug caused by having the
+    * wrong order of the 'ON' and 'default' cases.  UNSET now defaults to OFF,
+    * as documented in png.h
+    */
+   png_debug(1, "in png_init_filter_functions_neon");
+#ifdef PNG_ARM_NEON_API_SUPPORTED
+   switch ((pp->options >> PNG_ARM_NEON) & 3)
+   {
+      case PNG_OPTION_UNSET:
+         /* Allow the run-time check to execute if it has been enabled -
+          * thus both API and CHECK can be turned on.  If it isn't supported
+          * this case will fall through to the 'default' below, which just
+          * returns.
+          */
+#endif /* PNG_ARM_NEON_API_SUPPORTED */
+#ifdef PNG_ARM_NEON_CHECK_SUPPORTED
+         {
+            static volatile sig_atomic_t no_neon = -1; /* not checked */
+
+            if (no_neon < 0)
+               no_neon = !png_have_neon(pp);
+
+            if (no_neon)
+               return;
+         }
+#ifdef PNG_ARM_NEON_API_SUPPORTED
+         break;
+#endif
+#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */
+
+#ifdef PNG_ARM_NEON_API_SUPPORTED
+      default: /* OFF or INVALID */
+         return;
+
+      case PNG_OPTION_ON:
+         /* Option turned on */
+         break;
+   }
+#endif
+
+   /* IMPORTANT: any new external functions used here must be declared using
+    * PNG_INTERNAL_FUNCTION in ../pngpriv.h.  This is required so that the
+    * 'prefix' option to configure works:
+    *
+    *    ./configure --with-libpng-prefix=foobar_
+    *
+    * Verify you have got this right by running the above command, doing a build
+    * and examining pngprefix.h; it must contain a #define for every external
+    * function you add.  (Notice that this happens automatically for the
+    * initialization function.)
+    */
+   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
+
+   if (bpp == 3)
+   {
+      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
+      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+         png_read_filter_row_paeth3_neon;
+   }
+
+   else if (bpp == 4)
+   {
+      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
+      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+          png_read_filter_row_paeth4_neon;
+   }
+}
+#endif /* PNG_ARM_NEON_OPT > 0 */
+#endif /* READ */
--- a/xs/src/png/libpng/arm/filter_neon.S
+++ b/xs/src/png/libpng/arm/filter_neon.S
@ -0,0 +1,253 @@
+
+/* filter_neon.S - NEON optimised filter functions
+ *
+ * Copyright (c) 2014,2017 Glenn Randers-Pehrson
+ * Written by Mans Rullgard, 2011.
+ * Last changed in libpng 1.6.31 [July 27, 2017]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+/* This is required to get the symbol renames, which are #defines, and the
+ * definitions (or not) of PNG_ARM_NEON_OPT and PNG_ARM_NEON_IMPLEMENTATION.
+ */
+#define PNG_VERSION_INFO_ONLY
+#include "../pngpriv.h"
+
+#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
+#endif
+
+#ifdef PNG_READ_SUPPORTED
+
+/* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for
+ * ARM64).  The code in arm/filter_neon_intrinsics.c supports ARM64, however it
+ * only works if -mfpu=neon is specified on the GCC command line.  See pngpriv.h
+ * for the logic which sets PNG_USE_ARM_NEON_ASM:
+ */
+#if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */
+
+#if PNG_ARM_NEON_OPT > 0
+
+#ifdef __ELF__
+#   define ELF
+#else
+#   define ELF @
+#endif
+
+        .arch armv7-a
+        .fpu  neon
+
+.macro  func    name, export=0
+    .macro endfunc
+ELF     .size   \name, . - \name
+        .endfunc
+        .purgem endfunc
+    .endm
+        .text
+
+        /* Explicitly specifying alignment here because some versions of
+         * GAS don't align code correctly.  This is harmless in correctly
+         * written versions of GAS.
+         */
+        .align 2
+
+    .if \export
+        .global \name
+    .endif
+ELF     .type   \name, STT_FUNC
+        .func   \name
+\name:
+.endm
+
+func    png_read_filter_row_sub4_neon, export=1
+        ldr             r3,  [r0, #4]           @ rowbytes
+        vmov.i8         d3,  #0
+1:
+        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
+        vadd.u8         d0,  d3,  d4
+        vadd.u8         d1,  d0,  d5
+        vadd.u8         d2,  d1,  d6
+        vadd.u8         d3,  d2,  d7
+        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
+        subs            r3,  r3,  #16
+        bgt             1b
+
+        bx              lr
+endfunc
+
+func    png_read_filter_row_sub3_neon, export=1
+        ldr             r3,  [r0, #4]           @ rowbytes
+        vmov.i8         d3,  #0
+        mov             r0,  r1
+        mov             r2,  #3
+        mov             r12, #12
+        vld1.8          {q11},    [r0], r12
+1:
+        vext.8          d5,  d22, d23, #3
+        vadd.u8         d0,  d3,  d22
+        vext.8          d6,  d22, d23, #6
+        vadd.u8         d1,  d0,  d5
+        vext.8          d7,  d23, d23, #1
+        vld1.8          {q11},    [r0], r12
+        vst1.32         {d0[0]},  [r1,:32], r2
+        vadd.u8         d2,  d1,  d6
+        vst1.32         {d1[0]},  [r1], r2
+        vadd.u8         d3,  d2,  d7
+        vst1.32         {d2[0]},  [r1], r2
+        vst1.32         {d3[0]},  [r1], r2
+        subs            r3,  r3,  #12
+        bgt             1b
+
+        bx              lr
+endfunc
+
+func    png_read_filter_row_up_neon, export=1
+        ldr             r3,  [r0, #4]           @ rowbytes
+1:
+        vld1.8          {q0}, [r1,:128]
+        vld1.8          {q1}, [r2,:128]!
+        vadd.u8         q0,  q0,  q1
+        vst1.8          {q0}, [r1,:128]!
+        subs            r3,  r3,  #16
+        bgt             1b
+
+        bx              lr
+endfunc
+
+func    png_read_filter_row_avg4_neon, export=1
+        ldr             r12, [r0, #4]           @ rowbytes
+        vmov.i8         d3,  #0
+1:
+        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
+        vld4.32         {d16[],d17[],d18[],d19[]},[r2,:128]!
+        vhadd.u8        d0,  d3,  d16
+        vadd.u8         d0,  d0,  d4
+        vhadd.u8        d1,  d0,  d17
+        vadd.u8         d1,  d1,  d5
+        vhadd.u8        d2,  d1,  d18
+        vadd.u8         d2,  d2,  d6
+        vhadd.u8        d3,  d2,  d19
+        vadd.u8         d3,  d3,  d7
+        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
+        subs            r12, r12, #16
+        bgt             1b
+
+        bx              lr
+endfunc
+
+func    png_read_filter_row_avg3_neon, export=1
+        push            {r4,lr}
+        ldr             r12, [r0, #4]           @ rowbytes
+        vmov.i8         d3,  #0
+        mov             r0,  r1
+        mov             r4,  #3
+        mov             lr,  #12
+        vld1.8          {q11},    [r0], lr
+1:
+        vld1.8          {q10},    [r2], lr
+        vext.8          d5,  d22, d23, #3
+        vhadd.u8        d0,  d3,  d20
+        vext.8          d17, d20, d21, #3
+        vadd.u8         d0,  d0,  d22
+        vext.8          d6,  d22, d23, #6
+        vhadd.u8        d1,  d0,  d17
+        vext.8          d18, d20, d21, #6
+        vadd.u8         d1,  d1,  d5
+        vext.8          d7,  d23, d23, #1
+        vld1.8          {q11},    [r0], lr
+        vst1.32         {d0[0]},  [r1,:32], r4
+        vhadd.u8        d2,  d1,  d18
+        vst1.32         {d1[0]},  [r1], r4
+        vext.8          d19, d21, d21, #1
+        vadd.u8         d2,  d2,  d6
+        vhadd.u8        d3,  d2,  d19
+        vst1.32         {d2[0]},  [r1], r4
+        vadd.u8         d3,  d3,  d7
+        vst1.32         {d3[0]},  [r1], r4
+        subs            r12, r12, #12
+        bgt             1b
+
+        pop             {r4,pc}
+endfunc
+
+.macro  paeth           rx,  ra,  rb,  rc
+        vaddl.u8        q12, \ra, \rb           @ a + b
+        vaddl.u8        q15, \rc, \rc           @ 2*c
+        vabdl.u8        q13, \rb, \rc           @ pa
+        vabdl.u8        q14, \ra, \rc           @ pb
+        vabd.u16        q15, q12, q15           @ pc
+        vcle.u16        q12, q13, q14           @ pa <= pb
+        vcle.u16        q13, q13, q15           @ pa <= pc
+        vcle.u16        q14, q14, q15           @ pb <= pc
+        vand            q12, q12, q13           @ pa <= pb && pa <= pc
+        vmovn.u16       d28, q14
+        vmovn.u16       \rx, q12
+        vbsl            d28, \rb, \rc
+        vbsl            \rx, \ra, d28
+.endm
+
+func    png_read_filter_row_paeth4_neon, export=1
+        ldr             r12, [r0, #4]           @ rowbytes
+        vmov.i8         d3,  #0
+        vmov.i8         d20, #0
+1:
+        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
+        vld4.32         {d16[],d17[],d18[],d19[]},[r2,:128]!
+        paeth           d0,  d3,  d16, d20
+        vadd.u8         d0,  d0,  d4
+        paeth           d1,  d0,  d17, d16
+        vadd.u8         d1,  d1,  d5
+        paeth           d2,  d1,  d18, d17
+        vadd.u8         d2,  d2,  d6
+        paeth           d3,  d2,  d19, d18
+        vmov            d20, d19
+        vadd.u8         d3,  d3,  d7
+        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
+        subs            r12, r12, #16
+        bgt             1b
+
+        bx              lr
+endfunc
+
+func    png_read_filter_row_paeth3_neon, export=1
+        push            {r4,lr}
+        ldr             r12, [r0, #4]           @ rowbytes
+        vmov.i8         d3,  #0
+        vmov.i8         d4,  #0
+        mov             r0,  r1
+        mov             r4,  #3
+        mov             lr,  #12
+        vld1.8          {q11},    [r0], lr
+1:
+        vld1.8          {q10},    [r2], lr
+        paeth           d0,  d3,  d20, d4
+        vext.8          d5,  d22, d23, #3
+        vadd.u8         d0,  d0,  d22
+        vext.8          d17, d20, d21, #3
+        paeth           d1,  d0,  d17, d20
+        vst1.32         {d0[0]},  [r1,:32], r4
+        vext.8          d6,  d22, d23, #6
+        vadd.u8         d1,  d1,  d5
+        vext.8          d18, d20, d21, #6
+        paeth           d2,  d1,  d18, d17
+        vext.8          d7,  d23, d23, #1
+        vld1.8          {q11},    [r0], lr
+        vst1.32         {d1[0]},  [r1], r4
+        vadd.u8         d2,  d2,  d6
+        vext.8          d19, d21, d21, #1
+        paeth           d3,  d2,  d19, d18
+        vst1.32         {d2[0]},  [r1], r4
+        vmov            d4,  d19
+        vadd.u8         d3,  d3,  d7
+        vst1.32         {d3[0]},  [r1], r4
+        subs            r12, r12, #12
+        bgt             1b
+
+        pop             {r4,pc}
+endfunc
+#endif /* PNG_ARM_NEON_OPT > 0 */
+#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */
+#endif /* READ */
--- a/xs/src/png/libpng/arm/filter_neon_intrinsics.c
+++ b/xs/src/png/libpng/arm/filter_neon_intrinsics.c
@ -0,0 +1,387 @@
+
+/* filter_neon_intrinsics.c - NEON optimised filter functions
+ *
+ * Copyright (c) 2014,2016 Glenn Randers-Pehrson
+ * Written by James Yu <james.yu at linaro.org>, October 2013.
+ * Based on filter_neon.S, written by Mans Rullgard, 2011.
+ *
+ * Last changed in libpng 1.6.22 [May 26, 2016]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+/* This code requires -mfpu=neon on the command line: */
+#if PNG_ARM_NEON_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
+
+#include <arm_neon.h>
+
+/* libpng row pointers are not necessarily aligned to any particular boundary,
+ * however this code will only work with appropriate alignment.  arm/arm_init.c
+ * checks for this (and will not compile unless it is done). This code uses
+ * variants of png_aligncast to avoid compiler warnings.
+ */
+#define png_ptr(type,pointer) png_aligncast(type *,pointer)
+#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
+
+/* The following relies on a variable 'temp_pointer' being declared with type
+ * 'type'.  This is written this way just to hide the GCC strict aliasing
+ * warning; note that the code is safe because there never is an alias between
+ * the input and output pointers.
+ */
+#define png_ldr(type,pointer)\
+   (temp_pointer = png_ptr(type,pointer), *temp_pointer)
+
+#if PNG_ARM_NEON_OPT > 0
+
+void
+png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+   png_const_bytep pp = prev_row;
+
+   png_debug(1, "in png_read_filter_row_up_neon");
+
+   for (; rp < rp_stop; rp += 16, pp += 16)
+   {
+      uint8x16_t qrp, qpp;
+
+      qrp = vld1q_u8(rp);
+      qpp = vld1q_u8(pp);
+      qrp = vaddq_u8(qrp, qpp);
+      vst1q_u8(rp, qrp);
+   }
+}
+
+void
+png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+
+   uint8x16_t vtmp = vld1q_u8(rp);
+   uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp);
+   uint8x8x2_t vrp = *vrpt;
+
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   png_debug(1, "in png_read_filter_row_sub3_neon");
+
+   for (; rp < rp_stop;)
+   {
+      uint8x8_t vtmp1, vtmp2;
+      uint32x2_t *temp_pointer;
+
+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+      vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
+      vtmp2 = vext_u8(vrp.val[0], vrp.val[1], 6);
+      vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);
+
+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+      vdest.val[2] = vadd_u8(vdest.val[1], vtmp2);
+      vdest.val[3] = vadd_u8(vdest.val[2], vtmp1);
+
+      vtmp = vld1q_u8(rp + 12);
+      vrpt = png_ptr(uint8x8x2_t, &vtmp);
+      vrp = *vrpt;
+
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+      rp += 3;
+   }
+
+   PNG_UNUSED(prev_row)
+}
+
+void
+png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   png_debug(1, "in png_read_filter_row_sub4_neon");
+
+   for (; rp < rp_stop; rp += 16)
+   {
+      uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp));
+      uint8x8x4_t *vrpt = png_ptr(uint8x8x4_t,&vtmp);
+      uint8x8x4_t vrp = *vrpt;
+      uint32x2x4_t *temp_pointer;
+
+      vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
+      vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]);
+      vdest.val[2] = vadd_u8(vdest.val[1], vrp.val[2]);
+      vdest.val[3] = vadd_u8(vdest.val[2], vrp.val[3]);
+      vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0);
+   }
+
+   PNG_UNUSED(prev_row)
+}
+
+void
+png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_const_bytep pp = prev_row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+
+   uint8x16_t vtmp;
+   uint8x8x2_t *vrpt;
+   uint8x8x2_t vrp;
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   vtmp = vld1q_u8(rp);
+   vrpt = png_ptr(uint8x8x2_t,&vtmp);
+   vrp = *vrpt;
+
+   png_debug(1, "in png_read_filter_row_avg3_neon");
+
+   for (; rp < rp_stop; pp += 12)
+   {
+      uint8x8_t vtmp1, vtmp2, vtmp3;
+
+      uint8x8x2_t *vppt;
+      uint8x8x2_t vpp;
+
+      uint32x2_t *temp_pointer;
+
+      vtmp = vld1q_u8(pp);
+      vppt = png_ptr(uint8x8x2_t,&vtmp);
+      vpp = *vppt;
+
+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+      vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+
+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
+      vtmp3 = vext_u8(vrp.val[0], vrp.val[1], 6);
+      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
+
+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 6);
+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+
+      vtmp = vld1q_u8(rp + 12);
+      vrpt = png_ptr(uint8x8x2_t,&vtmp);
+      vrp = *vrpt;
+
+      vdest.val[2] = vhadd_u8(vdest.val[1], vtmp2);
+      vdest.val[2] = vadd_u8(vdest.val[2], vtmp3);
+
+      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
+
+      vdest.val[3] = vhadd_u8(vdest.val[2], vtmp2);
+      vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
+
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+      rp += 3;
+   }
+}
+
+void
+png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+   png_const_bytep pp = prev_row;
+
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   png_debug(1, "in png_read_filter_row_avg4_neon");
+
+   for (; rp < rp_stop; rp += 16, pp += 16)
+   {
+      uint32x2x4_t vtmp;
+      uint8x8x4_t *vrpt, *vppt;
+      uint8x8x4_t vrp, vpp;
+      uint32x2x4_t *temp_pointer;
+
+      vtmp = vld4_u32(png_ptr(uint32_t,rp));
+      vrpt = png_ptr(uint8x8x4_t,&vtmp);
+      vrp = *vrpt;
+      vtmp = vld4_u32(png_ptrc(uint32_t,pp));
+      vppt = png_ptr(uint8x8x4_t,&vtmp);
+      vpp = *vppt;
+
+      vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+      vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]);
+      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
+      vdest.val[2] = vhadd_u8(vdest.val[1], vpp.val[2]);
+      vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
+      vdest.val[3] = vhadd_u8(vdest.val[2], vpp.val[3]);
+      vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
+
+      vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0);
+   }
+}
+
+static uint8x8_t
+paeth(uint8x8_t a, uint8x8_t b, uint8x8_t c)
+{
+   uint8x8_t d, e;
+   uint16x8_t p1, pa, pb, pc;
+
+   p1 = vaddl_u8(a, b); /* a + b */
+   pc = vaddl_u8(c, c); /* c * 2 */
+   pa = vabdl_u8(b, c); /* pa */
+   pb = vabdl_u8(a, c); /* pb */
+   pc = vabdq_u16(p1, pc); /* pc */
+
+   p1 = vcleq_u16(pa, pb); /* pa <= pb */
+   pa = vcleq_u16(pa, pc); /* pa <= pc */
+   pb = vcleq_u16(pb, pc); /* pb <= pc */
+
+   p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */
+
+   d = vmovn_u16(pb);
+   e = vmovn_u16(p1);
+
+   d = vbsl_u8(d, b, c);
+   e = vbsl_u8(e, a, d);
+
+   return e;
+}
+
+void
+png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_const_bytep pp = prev_row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+
+   uint8x16_t vtmp;
+   uint8x8x2_t *vrpt;
+   uint8x8x2_t vrp;
+   uint8x8_t vlast = vdup_n_u8(0);
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   vtmp = vld1q_u8(rp);
+   vrpt = png_ptr(uint8x8x2_t,&vtmp);
+   vrp = *vrpt;
+
+   png_debug(1, "in png_read_filter_row_paeth3_neon");
+
+   for (; rp < rp_stop; pp += 12)
+   {
+      uint8x8x2_t *vppt;
+      uint8x8x2_t vpp;
+      uint8x8_t vtmp1, vtmp2, vtmp3;
+      uint32x2_t *temp_pointer;
+
+      vtmp = vld1q_u8(pp);
+      vppt = png_ptr(uint8x8x2_t,&vtmp);
+      vpp = *vppt;
+
+      vdest.val[0] = paeth(vdest.val[3], vpp.val[0], vlast);
+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+
+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
+      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
+
+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 6);
+      vtmp3 = vext_u8(vpp.val[0], vpp.val[1], 6);
+      vdest.val[2] = paeth(vdest.val[1], vtmp3, vtmp2);
+      vdest.val[2] = vadd_u8(vdest.val[2], vtmp1);
+
+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
+
+      vtmp = vld1q_u8(rp + 12);
+      vrpt = png_ptr(uint8x8x2_t,&vtmp);
+      vrp = *vrpt;
+
+      vdest.val[3] = paeth(vdest.val[2], vtmp2, vtmp3);
+      vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
+
+      vlast = vtmp2;
+
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+      rp += 3;
+   }
+}
+
+void
+png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+   png_const_bytep pp = prev_row;
+
+   uint8x8_t vlast = vdup_n_u8(0);
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   png_debug(1, "in png_read_filter_row_paeth4_neon");
+
+   for (; rp < rp_stop; rp += 16, pp += 16)
+   {
+      uint32x2x4_t vtmp;
+      uint8x8x4_t *vrpt, *vppt;
+      uint8x8x4_t vrp, vpp;
+      uint32x2x4_t *temp_pointer;
+
+      vtmp = vld4_u32(png_ptr(uint32_t,rp));
+      vrpt = png_ptr(uint8x8x4_t,&vtmp);
+      vrp = *vrpt;
+      vtmp = vld4_u32(png_ptrc(uint32_t,pp));
+      vppt = png_ptr(uint8x8x4_t,&vtmp);
+      vpp = *vppt;
+
+      vdest.val[0] = paeth(vdest.val[3], vpp.val[0], vlast);
+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+      vdest.val[1] = paeth(vdest.val[0], vpp.val[1], vpp.val[0]);
+      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
+      vdest.val[2] = paeth(vdest.val[1], vpp.val[2], vpp.val[1]);
+      vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
+      vdest.val[3] = paeth(vdest.val[2], vpp.val[3], vpp.val[2]);
+      vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
+
+      vlast = vpp.val[3];
+
+      vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0);
+   }
+}
+
+#endif /* PNG_ARM_NEON_OPT > 0 */
+#endif /* PNG_ARM_NEON_IMPLEMENTATION == 1 (intrinsics) */
+#endif /* READ */
--- a/xs/src/png/libpng/intel/filter_sse2_intrinsics.c
+++ b/xs/src/png/libpng/intel/filter_sse2_intrinsics.c
@ -0,0 +1,406 @@
+
+/* filter_sse2_intrinsics.c - SSE2 optimized filter functions
+ *
+ * Copyright (c) 2016-2017 Glenn Randers-Pehrson
+ * Written by Mike Klein and Matt Sarett
+ * Derived from arm/filter_neon_intrinsics.c
+ *
+ * Last changed in libpng 1.6.31 [July 27, 2017]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+#if PNG_INTEL_SSE_IMPLEMENTATION > 0
+
+#include <immintrin.h>
+
+/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
+ * They're positioned like this:
+ *    prev:  c b
+ *    row:   a d
+ * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
+ * whichever of a, b, or c is closest to p=a+b-c.
+ */
+
+static __m128i load4(const void* p) {
+   return _mm_cvtsi32_si128(*(const int*)p);
+}
+
+static void store4(void* p, __m128i v) {
+   *(int*)p = _mm_cvtsi128_si32(v);
+}
+
+static __m128i load3(const void* p) {
+   /* We'll load 2 bytes, then 1 byte,
+    * then mask them together, and finally load into SSE.
+    */
+   const png_uint_16* p01 = (png_const_uint_16p)p;
+   const png_byte*    p2  = (const png_byte*)(p01+1);
+
+   png_uint_32 v012 = (png_uint_32)(*p01)
+                    | (png_uint_32)(*p2) << 16;
+   return load4(&v012);
+}
+
+static void store3(void* p, __m128i v) {
+   /* We'll pull from SSE as a 32-bit int, then write
+    * its bottom two bytes, then its third byte.
+    */
+   png_uint_32 v012;
+   png_uint_16* p01;
+   png_byte*    p2;
+
+   store4(&v012, v);
+
+   p01 = (png_uint_16p)p;
+   p2  = (png_byte*)(p01+1);
+   *p01 = (png_uint_16)v012;
+   *p2  = (png_byte)(v012 >> 16);
+}
+
+void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev)
+{
+   /* The Sub filter predicts each pixel as the previous pixel, a.
+    * There is no pixel to the left of the first pixel.  It's encoded directly.
+    * That works with our main loop if we just say that left pixel was zero.
+    */
+   png_size_t rb;
+
+   __m128i a, d = _mm_setzero_si128();
+
+   png_debug(1, "in png_read_filter_row_sub3_sse2");
+
+   rb = row_info->rowbytes;
+   while (rb >= 4) {
+      a = d; d = load4(row);
+      d = _mm_add_epi8(d, a);
+      store3(row, d);
+
+      row += 3;
+      rb  -= 3;
+   }
+   if (rb > 0) {
+      a = d; d = load3(row);
+      d = _mm_add_epi8(d, a);
+      store3(row, d);
+
+      row += 3;
+      rb  -= 3;
+   }
+   PNG_UNUSED(prev)
+}
+
+void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev)
+{
+   /* The Sub filter predicts each pixel as the previous pixel, a.
+    * There is no pixel to the left of the first pixel.  It's encoded directly.
+    * That works with our main loop if we just say that left pixel was zero.
+    */
+   png_size_t rb;
+
+   __m128i a, d = _mm_setzero_si128();
+
+   png_debug(1, "in png_read_filter_row_sub4_sse2");
+
+   rb = row_info->rowbytes+4;
+   while (rb > 4) {
+      a = d; d = load4(row);
+      d = _mm_add_epi8(d, a);
+      store4(row, d);
+
+      row += 4;
+      rb  -= 4;
+   }
+   PNG_UNUSED(prev)
+}
+
+void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev)
+{
+   /* The Avg filter predicts each pixel as the (truncated) average of a and b.
+    * There's no pixel to the left of the first pixel.  Luckily, it's
+    * predicted to be half of the pixel above it.  So again, this works
+    * perfectly with our loop if we make sure a starts at zero.
+    */
+
+   png_size_t rb;
+
+   const __m128i zero = _mm_setzero_si128();
+
+   __m128i    b;
+   __m128i a, d = zero;
+
+   png_debug(1, "in png_read_filter_row_avg3_sse2");
+   rb = row_info->rowbytes;
+   while (rb >= 4) {
+      __m128i avg;
+             b = load4(prev);
+      a = d; d = load4(row );
+
+      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
+      avg = _mm_avg_epu8(a,b);
+      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
+      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
+                                            _mm_set1_epi8(1)));
+      d = _mm_add_epi8(d, avg);
+      store3(row, d);
+
+      prev += 3;
+      row  += 3;
+      rb   -= 3;
+   }
+   if (rb > 0) {
+      __m128i avg;
+             b = load3(prev);
+      a = d; d = load3(row );
+
+      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
+      avg = _mm_avg_epu8(a,b);
+      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
+      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
+                                            _mm_set1_epi8(1)));
+
+      d = _mm_add_epi8(d, avg);
+      store3(row, d);
+
+      prev += 3;
+      row  += 3;
+      rb   -= 3;
+   }
+}
+
+void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev)
+{
+   /* The Avg filter predicts each pixel as the (truncated) average of a and b.
+    * There's no pixel to the left of the first pixel.  Luckily, it's
+    * predicted to be half of the pixel above it.  So again, this works
+    * perfectly with our loop if we make sure a starts at zero.
+    */
+   png_size_t rb;
+   const __m128i zero = _mm_setzero_si128();
+   __m128i    b;
+   __m128i a, d = zero;
+
+   png_debug(1, "in png_read_filter_row_avg4_sse2");
+
+   rb = row_info->rowbytes+4;
+   while (rb > 4) {
+      __m128i avg;
+             b = load4(prev);
+      a = d; d = load4(row );
+
+      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
+      avg = _mm_avg_epu8(a,b);
+      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
+      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
+                                            _mm_set1_epi8(1)));
+
+      d = _mm_add_epi8(d, avg);
+      store4(row, d);
+
+      prev += 4;
+      row  += 4;
+      rb   -= 4;
+   }
+}
+
+/* Returns |x| for 16-bit lanes. */
+static __m128i abs_i16(__m128i x) {
+#if PNG_INTEL_SSE_IMPLEMENTATION >= 2
+   return _mm_abs_epi16(x);
+#else
+   /* Read this all as, return x<0 ? -x : x.
+   * To negate two's complement, you flip all the bits then add 1.
+    */
+   __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
+
+   /* Flip negative lanes. */
+   x = _mm_xor_si128(x, is_negative);
+
+   /* +1 to negative lanes, else +0. */
+   x = _mm_sub_epi16(x, is_negative);
+   return x;
+#endif
+}
+
+/* Bytewise c ? t : e. */
+static __m128i if_then_else(__m128i c, __m128i t, __m128i e) {
+#if PNG_INTEL_SSE_IMPLEMENTATION >= 3
+   return _mm_blendv_epi8(e,t,c);
+#else
+   return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
+#endif
+}
+
+void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev)
+{
+   /* Paeth tries to predict pixel d using the pixel to the left of it, a,
+    * and two pixels from the previous row, b and c:
+    *   prev: c b
+    *   row:  a d
+    * The Paeth function predicts d to be whichever of a, b, or c is nearest to
+    * p=a+b-c.
+    *
+    * The first pixel has no left context, and so uses an Up filter, p = b.
+    * This works naturally with our main loop's p = a+b-c if we force a and c
+    * to zero.
+    * Here we zero b and d, which become c and a respectively at the start of
+    * the loop.
+    */
+   png_size_t rb;
+   const __m128i zero = _mm_setzero_si128();
+   __m128i c, b = zero,
+           a, d = zero;
+
+   png_debug(1, "in png_read_filter_row_paeth3_sse2");
+
+   rb = row_info->rowbytes;
+   while (rb >= 4) {
+      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
+       * intermediates.
+       */
+      __m128i pa,pb,pc,smallest,nearest;
+      c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
+      a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
+
+      /* (p-a) == (a+b-c - a) == (b-c) */
+   
+      pa = _mm_sub_epi16(b,c);
+
+      /* (p-b) == (a+b-c - b) == (a-c) */
+      pb = _mm_sub_epi16(a,c);
+
+      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
+      pc = _mm_add_epi16(pa,pb);
+
+      pa = abs_i16(pa);  /* |p-a| */
+      pb = abs_i16(pb);  /* |p-b| */
+      pc = abs_i16(pc);  /* |p-c| */
+
+      smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
+
+      /* Paeth breaks ties favoring a over b over c. */
+      nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
+                 if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
+                                                             c));
+
+      /* Note `_epi8`: we need addition to wrap modulo 255. */
+      d = _mm_add_epi8(d, nearest);
+      store3(row, _mm_packus_epi16(d,d));
+
+      prev += 3;
+      row  += 3;
+      rb   -= 3;
+   }
+   if (rb > 0) {
+      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
+       * intermediates.
+       */
+      __m128i pa,pb,pc,smallest,nearest;
+      c = b; b = _mm_unpacklo_epi8(load3(prev), zero);
+      a = d; d = _mm_unpacklo_epi8(load3(row ), zero);
+
+      /* (p-a) == (a+b-c - a) == (b-c) */
+      pa = _mm_sub_epi16(b,c);
+
+      /* (p-b) == (a+b-c - b) == (a-c) */
+      pb = _mm_sub_epi16(a,c);
+
+      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
+      pc = _mm_add_epi16(pa,pb);
+
+      pa = abs_i16(pa);  /* |p-a| */
+      pb = abs_i16(pb);  /* |p-b| */
+      pc = abs_i16(pc);  /* |p-c| */
+
+      smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
+
+      /* Paeth breaks ties favoring a over b over c. */
+      nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
+                         if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
+                                                                     c));
+
+      /* Note `_epi8`: we need addition to wrap modulo 255. */
+      d = _mm_add_epi8(d, nearest);
+      store3(row, _mm_packus_epi16(d,d));
+
+      prev += 3;
+      row  += 3;
+      rb   -= 3;
+   }
+}
+
+void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev)
+{
+   /* Paeth tries to predict pixel d using the pixel to the left of it, a,
+    * and two pixels from the previous row, b and c:
+    *   prev: c b
+    *   row:  a d
+    * The Paeth function predicts d to be whichever of a, b, or c is nearest to
+    * p=a+b-c.
+    *
+    * The first pixel has no left context, and so uses an Up filter, p = b.
+    * This works naturally with our main loop's p = a+b-c if we force a and c
+    * to zero.
+    * Here we zero b and d, which become c and a respectively at the start of
+    * the loop.
+    */
+   png_size_t rb;
+   const __m128i zero = _mm_setzero_si128();
+   __m128i pa,pb,pc,smallest,nearest;
+   __m128i c, b = zero,
+           a, d = zero;
+
+   png_debug(1, "in png_read_filter_row_paeth4_sse2");
+
+   rb = row_info->rowbytes+4;
+   while (rb > 4) {
+      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
+       * intermediates.
+       */
+      c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
+      a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
+
+      /* (p-a) == (a+b-c - a) == (b-c) */
+      pa = _mm_sub_epi16(b,c);
+
+      /* (p-b) == (a+b-c - b) == (a-c) */
+      pb = _mm_sub_epi16(a,c);
+
+      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
+      pc = _mm_add_epi16(pa,pb);
+
+      pa = abs_i16(pa);  /* |p-a| */
+      pb = abs_i16(pb);  /* |p-b| */
+      pc = abs_i16(pc);  /* |p-c| */
+
+      smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
+
+      /* Paeth breaks ties favoring a over b over c. */
+      nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
+                         if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
+                                                                     c));
+
+      /* Note `_epi8`: we need addition to wrap modulo 255. */
+      d = _mm_add_epi8(d, nearest);
+      store4(row, _mm_packus_epi16(d,d));
+
+      prev += 4;
+      row  += 4;
+      rb   -= 4;
+   }
+}
+
+#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
+#endif /* READ */
--- a/xs/src/png/libpng/intel/intel_init.c
+++ b/xs/src/png/libpng/intel/intel_init.c
@ -0,0 +1,53 @@
+
+/* intel_init.c - SSE2 optimized filter functions
+ *
+ * Copyright (c) 2016-2017 Glenn Randers-Pehrson
+ * Written by Mike Klein and Matt Sarett, Google, Inc.
+ * Derived from arm/arm_init.c
+ *
+ * Last changed in libpng 1.6.29 [March 16, 2017]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+#if PNG_INTEL_SSE_IMPLEMENTATION > 0
+
+void
+png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
+{
+   /* The techniques used to implement each of these filters in SSE operate on
+    * one pixel at a time.
+    * So they generally speed up 3bpp images about 3x, 4bpp images about 4x.
+    * They can scale up to 6 and 8 bpp images and down to 2 bpp images,
+    * but they'd not likely have any benefit for 1bpp images.
+    * Most of these can be implemented using only MMX and 64-bit registers,
+    * but they end up a bit slower than using the equally-ubiquitous SSE2.
+   */
+   png_debug(1, "in png_init_filter_functions_sse2");
+   if (bpp == 3)
+   {
+      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
+      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2;
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+         png_read_filter_row_paeth3_sse2;
+   }
+   else if (bpp == 4)
+   {
+      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2;
+      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2;
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+          png_read_filter_row_paeth4_sse2;
+   }
+
+   /* No need optimize PNG_FILTER_VALUE_UP.  The compiler should
+    * autovectorize.
+    */
+}
+
+#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
+#endif /* PNG_READ_SUPPORTED */
--- a/xs/src/png/libpng/mips/filter_msa_intrinsics.c
+++ b/xs/src/png/libpng/mips/filter_msa_intrinsics.c
@ -0,0 +1,807 @@
+
+/* filter_msa_intrinsics.c - MSA optimised filter functions
+ *
+ * Copyright (c) 2016 Glenn Randers-Pehrson
+ * Written by Mandar Sahastrabuddhe, August 2016.
+ * Last changed in libpng 1.6.25 [September 1, 2016]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+/* This code requires -mfpu=msa on the command line: */
+#if PNG_MIPS_MSA_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
+
+#include <msa.h>
+
+/* libpng row pointers are not necessarily aligned to any particular boundary,
+ * however this code will only work with appropriate alignment. mips/mips_init.c
+ * checks for this (and will not compile unless it is done). This code uses
+ * variants of png_aligncast to avoid compiler warnings.
+ */
+#define png_ptr(type,pointer) png_aligncast(type *,pointer)
+#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
+
+/* The following relies on a variable 'temp_pointer' being declared with type
+ * 'type'.  This is written this way just to hide the GCC strict aliasing
+ * warning; note that the code is safe because there never is an alias between
+ * the input and output pointers.
+ */
+#define png_ldr(type,pointer)\
+   (temp_pointer = png_ptr(type,pointer), *temp_pointer)
+
+#if PNG_MIPS_MSA_OPT > 0
+
+#ifdef CLANG_BUILD
+   #define MSA_SRLI_B(a, b)   __msa_srli_b((v16i8) a, b)
+
+   #define LW(psrc)                              \
+   ( {                                           \
+       uint8_t *psrc_lw_m = (uint8_t *) (psrc);  \
+       uint32_t val_m;                           \
+                                                 \
+       asm volatile (                            \
+           "lw  %[val_m],  %[psrc_lw_m]  \n\t"   \
+                                                 \
+           : [val_m] "=r" (val_m)                \
+           : [psrc_lw_m] "m" (*psrc_lw_m)        \
+       );                                        \
+                                                 \
+       val_m;                                    \
+   } )
+
+   #define SH(val, pdst)                         \
+   {                                             \
+       uint8_t *pdst_sh_m = (uint8_t *) (pdst);  \
+       uint16_t val_m = (val);                   \
+                                                 \
+       asm volatile (                            \
+           "sh  %[val_m],  %[pdst_sh_m]  \n\t"   \
+                                                 \
+           : [pdst_sh_m] "=m" (*pdst_sh_m)       \
+           : [val_m] "r" (val_m)                 \
+       );                                        \
+   }
+
+   #define SW(val, pdst)                         \
+   {                                             \
+       uint8_t *pdst_sw_m = (uint8_t *) (pdst);  \
+       uint32_t val_m = (val);                   \
+                                                 \
+       asm volatile (                            \
+           "sw  %[val_m],  %[pdst_sw_m]  \n\t"   \
+                                                 \
+           : [pdst_sw_m] "=m" (*pdst_sw_m)       \
+           : [val_m] "r" (val_m)                 \
+       );                                        \
+   }
+
+       #if (__mips == 64)
+        #define SD(val, pdst)                         \
+        {                                             \
+            uint8_t *pdst_sd_m = (uint8_t *) (pdst);  \
+            uint64_t val_m = (val);                   \
+                                                      \
+            asm volatile (                            \
+                "sd  %[val_m],  %[pdst_sd_m]  \n\t"   \
+                                                      \
+                : [pdst_sd_m] "=m" (*pdst_sd_m)       \
+                : [val_m] "r" (val_m)                 \
+            );                                        \
+        }
+    #else
+        #define SD(val, pdst)                                          \
+        {                                                              \
+            uint8_t *pdst_sd_m = (uint8_t *) (pdst);                   \
+            uint32_t val0_m, val1_m;                                   \
+                                                                       \
+            val0_m = (uint32_t) ((val) & 0x00000000FFFFFFFF);          \
+            val1_m = (uint32_t) (((val) >> 32) & 0x00000000FFFFFFFF);  \
+                                                                       \
+            SW(val0_m, pdst_sd_m);                                     \
+            SW(val1_m, pdst_sd_m + 4);                                 \
+        }
+    #endif
+#else
+   #define MSA_SRLI_B(a, b)   (a >> b)
+
+#if (__mips_isa_rev >= 6)
+   #define LW(psrc)                              \
+   ( {                                           \
+       uint8_t *psrc_lw_m = (uint8_t *) (psrc);  \
+       uint32_t val_m;                           \
+                                                 \
+       asm volatile (                            \
+           "lw  %[val_m],  %[psrc_lw_m]  \n\t"   \
+                                                 \
+           : [val_m] "=r" (val_m)                \
+           : [psrc_lw_m] "m" (*psrc_lw_m)        \
+       );                                        \
+                                                 \
+       val_m;                                    \
+   } )
+
+   #define SH(val, pdst)                         \
+   {                                             \
+       uint8_t *pdst_sh_m = (uint8_t *) (pdst);  \
+       uint16_t val_m = (val);                   \
+                                                 \
+       asm volatile (                            \
+           "sh  %[val_m],  %[pdst_sh_m]  \n\t"   \
+                                                 \
+           : [pdst_sh_m] "=m" (*pdst_sh_m)       \
+           : [val_m] "r" (val_m)                 \
+       );                                        \
+   }
+
+   #define SW(val, pdst)                         \
+   {                                             \
+       uint8_t *pdst_sw_m = (uint8_t *) (pdst);  \
+       uint32_t val_m = (val);                   \
+                                                 \
+       asm volatile (                            \
+           "sw  %[val_m],  %[pdst_sw_m]  \n\t"   \
+                                                 \
+           : [pdst_sw_m] "=m" (*pdst_sw_m)       \
+           : [val_m] "r" (val_m)                 \
+       );                                        \
+   }
+
+   #if (__mips == 64)
+        #define SD(val, pdst)                         \
+        {                                             \
+            uint8_t *pdst_sd_m = (uint8_t *) (pdst);  \
+            uint64_t val_m = (val);                   \
+                                                      \
+            asm volatile (                            \
+                "sd  %[val_m],  %[pdst_sd_m]  \n\t"   \
+                                                      \
+                : [pdst_sd_m] "=m" (*pdst_sd_m)       \
+                : [val_m] "r" (val_m)                 \
+            );                                        \
+        }
+    #else
+        #define SD(val, pdst)                                          \
+        {                                                              \
+            uint8_t *pdst_sd_m = (uint8_t *) (pdst);                   \
+            uint32_t val0_m, val1_m;                                   \
+                                                                       \
+            val0_m = (uint32_t) ((val) & 0x00000000FFFFFFFF);          \
+            val1_m = (uint32_t) (((val) >> 32) & 0x00000000FFFFFFFF);  \
+                                                                       \
+            SW(val0_m, pdst_sd_m);                                     \
+            SW(val1_m, pdst_sd_m + 4);                                 \
+        }
+    #endif
+#else  // !(__mips_isa_rev >= 6)
+   #define LW(psrc)                              \
+   ( {                                           \
+       uint8_t *psrc_lw_m = (uint8_t *) (psrc);  \
+       uint32_t val_m;                           \
+                                                 \
+       asm volatile (                            \
+           "ulw  %[val_m],  %[psrc_lw_m]  \n\t"  \
+                                                 \
+           : [val_m] "=r" (val_m)                \
+           : [psrc_lw_m] "m" (*psrc_lw_m)        \
+       );                                        \
+                                                 \
+       val_m;                                    \
+   } )
+
+   #define SH(val, pdst)                         \
+   {                                             \
+       uint8_t *pdst_sh_m = (uint8_t *) (pdst);  \
+       uint16_t val_m = (val);                   \
+                                                 \
+       asm volatile (                            \
+           "ush  %[val_m],  %[pdst_sh_m]  \n\t"  \
+                                                 \
+           : [pdst_sh_m] "=m" (*pdst_sh_m)       \
+           : [val_m] "r" (val_m)                 \
+       );                                        \
+   }
+
+   #define SW(val, pdst)                         \
+   {                                             \
+       uint8_t *pdst_sw_m = (uint8_t *) (pdst);  \
+       uint32_t val_m = (val);                   \
+                                                 \
+       asm volatile (                            \
+           "usw  %[val_m],  %[pdst_sw_m]  \n\t"  \
+                                                 \
+           : [pdst_sw_m] "=m" (*pdst_sw_m)       \
+           : [val_m] "r" (val_m)                 \
+       );                                        \
+   }
+
+   #define SD(val, pdst)                                          \
+    {                                                              \
+        uint8_t *pdst_sd_m = (uint8_t *) (pdst);                   \
+        uint32_t val0_m, val1_m;                                   \
+                                                                   \
+        val0_m = (uint32_t) ((val) & 0x00000000FFFFFFFF);          \
+        val1_m = (uint32_t) (((val) >> 32) & 0x00000000FFFFFFFF);  \
+                                                                   \
+        SW(val0_m, pdst_sd_m);                                     \
+        SW(val1_m, pdst_sd_m + 4);                                 \
+    }
+
+    #define SW_ZERO(pdst)                      \
+    {                                          \
+        uint8_t *pdst_m = (uint8_t *) (pdst);  \
+                                               \
+        asm volatile (                         \
+            "usw  $0,  %[pdst_m]  \n\t"        \
+                                               \
+            : [pdst_m] "=m" (*pdst_m)          \
+            :                                  \
+        );                                     \
+    }
+#endif  // (__mips_isa_rev >= 6)
+#endif
+
+#define LD_B(RTYPE, psrc) *((RTYPE *) (psrc))
+#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
+#define LD_B2(RTYPE, psrc, stride, out0, out1)  \
+{                                               \
+    out0 = LD_B(RTYPE, (psrc));                 \
+    out1 = LD_B(RTYPE, (psrc) + stride);        \
+}
+#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
+#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3)   \
+{                                                            \
+    LD_B2(RTYPE, (psrc), stride, out0, out1);                \
+    LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3);  \
+}
+#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
+
+#define ST_B(RTYPE, in, pdst) *((RTYPE *) (pdst)) = (in)
+#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
+#define ST_B2(RTYPE, in0, in1, pdst, stride)  \
+{                                             \
+    ST_B(RTYPE, in0, (pdst));                 \
+    ST_B(RTYPE, in1, (pdst) + stride);        \
+}
+#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
+#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride)    \
+{                                                         \
+    ST_B2(RTYPE, in0, in1, (pdst), stride);               \
+    ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride);  \
+}
+#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
+
+#define ADD2(in0, in1, in2, in3, out0, out1)  \
+{                                             \
+    out0 = in0 + in1;                         \
+    out1 = in2 + in3;                         \
+}
+#define ADD3(in0, in1, in2, in3, in4, in5,  \
+             out0, out1, out2)              \
+{                                           \
+    ADD2(in0, in1, in2, in3, out0, out1);   \
+    out2 = in4 + in5;                       \
+}
+#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7,  \
+             out0, out1, out2, out3)                  \
+{                                                     \
+    ADD2(in0, in1, in2, in3, out0, out1);             \
+    ADD2(in4, in5, in6, in7, out2, out3);             \
+}
+
+#define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1)      \
+{                                                           \
+    out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1);  \
+    out1 = (RTYPE) __msa_ilvr_b((v16i8) in2, (v16i8) in3);  \
+}
+#define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__)
+
+#define HSUB_UB2(RTYPE, in0, in1, out0, out1)                 \
+{                                                             \
+    out0 = (RTYPE) __msa_hsub_u_h((v16u8) in0, (v16u8) in0);  \
+    out1 = (RTYPE) __msa_hsub_u_h((v16u8) in1, (v16u8) in1);  \
+}
+#define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
+
+#define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val)                 \
+{                                                                         \
+    v16i8 zero_m = { 0 };                                                 \
+    out0 = (RTYPE) __msa_sldi_b((v16i8) zero_m, (v16i8) in0, slide_val);  \
+    out1 = (RTYPE) __msa_sldi_b((v16i8) zero_m, (v16i8) in1, slide_val);  \
+}
+#define SLDI_B2_0_UB(...) SLDI_B2_0(v16u8, __VA_ARGS__)
+
+#define SLDI_B3_0(RTYPE, in0, in1, in2, out0, out1, out2,  slide_val)     \
+{                                                                         \
+    v16i8 zero_m = { 0 };                                                 \
+    SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);                    \
+    out2 = (RTYPE) __msa_sldi_b((v16i8) zero_m, (v16i8) in2, slide_val);  \
+}
+#define SLDI_B3_0_UB(...) SLDI_B3_0(v16u8, __VA_ARGS__)
+
+#define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1)      \
+{                                                            \
+    out0 = (RTYPE) __msa_ilvev_w((v4i32) in1, (v4i32) in0);  \
+    out1 = (RTYPE) __msa_ilvev_w((v4i32) in3, (v4i32) in2);  \
+}
+#define ILVEV_W2_UB(...) ILVEV_W2(v16u8, __VA_ARGS__)
+
+#define ADD_ABS_H3(RTYPE, in0, in1, in2, out0, out1, out2)  \
+{                                                           \
+    RTYPE zero = {0};                                       \
+                                                            \
+    out0 = __msa_add_a_h((v8i16) zero, in0);                \
+    out1 = __msa_add_a_h((v8i16) zero, in1);                \
+    out2 = __msa_add_a_h((v8i16) zero, in2);                \
+}
+#define ADD_ABS_H3_SH(...) ADD_ABS_H3(v8i16, __VA_ARGS__)
+
+#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1)       \
+{                                                                          \
+    out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0);  \
+    out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2);  \
+}
+#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
+
+#define CMP_AND_SELECT(inp0, inp1, inp2, inp3, inp4, inp5, out0)              \
+{                                                                             \
+   v8i16 _sel_h0, _sel_h1;                                                    \
+   v16u8 _sel_b0, _sel_b1;                                                    \
+   _sel_h0 = (v8i16) __msa_clt_u_h((v8u16) inp1, (v8u16) inp0);               \
+   _sel_b0 = (v16u8) __msa_pckev_b((v16i8) _sel_h0, (v16i8) _sel_h0);         \
+   inp0 = (v8i16) __msa_bmnz_v((v16u8) inp0, (v16u8) inp1, (v16u8) _sel_h0);  \
+   inp4 = (v16u8) __msa_bmnz_v(inp3, inp4, _sel_b0);                          \
+   _sel_h1 = (v8i16) __msa_clt_u_h((v8u16) inp2, (v8u16) inp0);               \
+   _sel_b1 = (v16u8) __msa_pckev_b((v16i8) _sel_h1, (v16i8) _sel_h1);         \
+   inp4 = (v16u8) __msa_bmnz_v(inp4, inp5, _sel_b1);                          \
+   out0 += inp4;                                                              \
+}
+
+void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row,
+                                png_const_bytep prev_row)
+{
+   png_size_t i, cnt, cnt16, cnt32;
+   png_size_t istop = row_info->rowbytes;
+   png_bytep rp = row;
+   png_const_bytep pp = prev_row;
+   v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+   for (i = 0; i < (istop >> 6); i++)
+   {
+      LD_UB4(rp, 16, src0, src1, src2, src3);
+      LD_UB4(pp, 16, src4, src5, src6, src7);
+      pp += 64;
+
+	  ADD4(src0, src4, src1, src5, src2, src6, src3, src7,
+	       src0, src1, src2, src3);
+
+      ST_UB4(src0, src1, src2, src3, rp, 16);
+      rp += 64;
+   }
+
+   if (istop & 0x3F)
+   {
+      cnt32 = istop & 0x20;
+      cnt16 = istop & 0x10;
+      cnt = istop & 0xF;
+
+      if(cnt32)
+      {
+         if (cnt16 && cnt)
+         {
+            LD_UB4(rp, 16, src0, src1, src2, src3);
+            LD_UB4(pp, 16, src4, src5, src6, src7);
+
+            ADD4(src0, src4, src1, src5, src2, src6, src3, src7,
+	             src0, src1, src2, src3);
+
+            ST_UB4(src0, src1, src2, src3, rp, 16);
+            rp += 64;
+         }
+         else if (cnt16 || cnt)
+         {
+            LD_UB2(rp, 16, src0, src1);
+            LD_UB2(pp, 16, src4, src5);
+            pp += 32;
+            src2 = LD_UB(rp + 32);
+            src6 = LD_UB(pp);
+
+            ADD3(src0, src4, src1, src5, src2, src6, src0, src1, src2);
+
+            ST_UB2(src0, src1, rp, 16);
+            rp += 32;
+            ST_UB(src2, rp);
+            rp += 16;
+         }
+         else
+         {
+            LD_UB2(rp, 16, src0, src1);
+            LD_UB2(pp, 16, src4, src5);
+
+			ADD2(src0, src4, src1, src5, src0, src1);
+
+            ST_UB2(src0, src1, rp, 16);
+            rp += 32;
+         }
+      }
+      else if (cnt16 && cnt)
+      {
+         LD_UB2(rp, 16, src0, src1);
+         LD_UB2(pp, 16, src4, src5);
+
+         ADD2(src0, src4, src1, src5, src0, src1);
+
+         ST_UB2(src0, src1, rp, 16);
+         rp += 32;
+      }
+      else if (cnt16 || cnt)
+      {
+         src0 = LD_UB(rp);
+         src4 = LD_UB(pp);
+         pp += 16;
+
+         src0 += src4;
+
+         ST_UB(src0, rp);
+         rp += 16;
+      }
+   }
+}
+
+void png_read_filter_row_sub4_msa(png_row_infop row_info, png_bytep row,
+                                  png_const_bytep prev_row)
+{
+   png_size_t count;
+   png_size_t istop = row_info->rowbytes;
+   png_bytep src = row;
+   png_bytep nxt = row + 4;
+   int32_t inp0;
+   v16u8 src0, src1, src2, src3, src4;
+   v16u8 dst0, dst1;
+   v16u8 zero = { 0 };
+
+   istop -= 4;
+
+   inp0 = LW(src);
+   src += 4;
+   src0 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp0);
+
+   for (count = 0; count < istop; count += 16)
+   {
+      src1 = LD_UB(src);
+      src += 16;
+
+      src2 = (v16u8) __msa_sldi_b((v16i8) zero, (v16i8) src1, 4);
+      src3 = (v16u8) __msa_sldi_b((v16i8) zero, (v16i8) src1, 8);
+      src4 = (v16u8) __msa_sldi_b((v16i8) zero, (v16i8) src1, 12);
+      src1 += src0;
+      src2 += src1;
+      src3 += src2;
+      src4 += src3;
+      src0 = src4;
+      ILVEV_W2_UB(src1, src2, src3, src4, dst0, dst1);
+      dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
+
+      ST_UB(dst0, nxt);
+      nxt += 16;
+   }
+}
+
+void png_read_filter_row_sub3_msa(png_row_infop row_info, png_bytep row,
+                                  png_const_bytep prev_row)
+{
+   png_size_t count;
+   png_size_t istop = row_info->rowbytes;
+   png_bytep src = row;
+   png_bytep nxt = row + 3;
+   int64_t out0;
+   int32_t inp0, out1;
+   v16u8 src0, src1, src2, src3, src4, dst0, dst1;
+   v16u8 zero = { 0 };
+   v16i8 mask0 = { 0, 1, 2, 16, 17, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+   v16i8 mask1 = { 0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20, 21, 0, 0, 0, 0 };
+
+   istop -= 3;
+
+   inp0 = LW(src);
+   src += 3;
+   src0 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp0);
+
+   for (count = 0; count < istop; count += 12)
+   {
+      src1 = LD_UB(src);
+      src += 12;
+
+      src2 = (v16u8) __msa_sldi_b((v16i8) zero, (v16i8) src1, 3);
+      src3 = (v16u8) __msa_sldi_b((v16i8) zero, (v16i8) src1, 6);
+      src4 = (v16u8) __msa_sldi_b((v16i8) zero, (v16i8) src1, 9);
+      src1 += src0;
+      src2 += src1;
+      src3 += src2;
+      src4 += src3;
+      src0 = src4;
+      VSHF_B2_UB(src1, src2, src3, src4, mask0, mask0, dst0, dst1);
+      dst0 = (v16u8) __msa_vshf_b(mask1, (v16i8) dst1, (v16i8) dst0);
+      out0 = __msa_copy_s_d((v2i64) dst0, 0);
+      out1 = __msa_copy_s_w((v4i32) dst0, 2);
+
+      SD(out0, nxt);
+      nxt += 8;
+      SW(out1, nxt);
+      nxt += 4;
+   }
+}
+
+void png_read_filter_row_avg4_msa(png_row_infop row_info, png_bytep row,
+                                  png_const_bytep prev_row)
+{
+   png_size_t i;
+   png_bytep src = row;
+   png_bytep nxt = row;
+   png_const_bytep pp = prev_row;
+   png_size_t istop = row_info->rowbytes - 4;
+   int32_t inp0, inp1, out0;
+   v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, dst0, dst1;
+   v16u8 zero = { 0 };
+
+   inp0 = LW(pp);
+   pp += 4;
+   inp1 = LW(src);
+   src += 4;
+   src0 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp0);
+   src1 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp1);
+   src0 = (v16u8) MSA_SRLI_B(src0, 1);
+   src1 += src0;
+   out0 = __msa_copy_s_w((v4i32) src1, 0);
+   SW(out0, nxt);
+   nxt += 4;
+
+   for (i = 0; i < istop; i += 16)
+   {
+      src2 = LD_UB(pp);
+      pp += 16;
+      src6 = LD_UB(src);
+      src += 16;
+
+      SLDI_B2_0_UB(src2, src6, src3, src7, 4);
+      SLDI_B2_0_UB(src2, src6, src4, src8, 8);
+      SLDI_B2_0_UB(src2, src6, src5, src9, 12);
+      src2 = __msa_ave_u_b(src2, src1);
+      src6 += src2;
+      src3 = __msa_ave_u_b(src3, src6);
+      src7 += src3;
+      src4 = __msa_ave_u_b(src4, src7);
+      src8 += src4;
+      src5 = __msa_ave_u_b(src5, src8);
+      src9 += src5;
+      src1 = src9;
+      ILVEV_W2_UB(src6, src7, src8, src9, dst0, dst1);
+      dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
+
+      ST_UB(dst0, nxt);
+      nxt += 16;
+   }
+}
+
+void png_read_filter_row_avg3_msa(png_row_infop row_info, png_bytep row,
+                                  png_const_bytep prev_row)
+{
+   png_size_t i;
+   png_bytep src = row;
+   png_bytep nxt = row;
+   png_const_bytep pp = prev_row;
+   png_size_t istop = row_info->rowbytes - 3;
+   int64_t out0;
+   int32_t inp0, inp1, out1;
+   int16_t out2;
+   v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, dst0, dst1;
+   v16u8 zero = { 0 };
+   v16i8 mask0 = { 0, 1, 2, 16, 17, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+   v16i8 mask1 = { 0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20, 21, 0, 0, 0, 0 };
+
+   inp0 = LW(pp);
+   pp += 3;
+   inp1 = LW(src);
+   src += 3;
+   src0 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp0);
+   src1 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp1);
+   src0 = (v16u8) MSA_SRLI_B(src0, 1);
+   src1 += src0;
+   out2 = __msa_copy_s_h((v8i16) src1, 0);
+   SH(out2, nxt);
+   nxt += 2;
+   nxt[0] = src1[2];
+   nxt++;
+
+   for (i = 0; i < istop; i += 12)
+   {
+      src2 = LD_UB(pp);
+      pp += 12;
+      src6 = LD_UB(src);
+      src += 12;
+
+      SLDI_B2_0_UB(src2, src6, src3, src7, 3);
+      SLDI_B2_0_UB(src2, src6, src4, src8, 6);
+      SLDI_B2_0_UB(src2, src6, src5, src9, 9);
+      src2 = __msa_ave_u_b(src2, src1);
+      src6 += src2;
+      src3 = __msa_ave_u_b(src3, src6);
+      src7 += src3;
+      src4 = __msa_ave_u_b(src4, src7);
+      src8 += src4;
+      src5 = __msa_ave_u_b(src5, src8);
+      src9 += src5;
+      src1 = src9;
+      VSHF_B2_UB(src6, src7, src8, src9, mask0, mask0, dst0, dst1);
+      dst0 = (v16u8) __msa_vshf_b(mask1, (v16i8) dst1, (v16i8) dst0);
+      out0 = __msa_copy_s_d((v2i64) dst0, 0);
+      out1 = __msa_copy_s_w((v4i32) dst0, 2);
+
+      SD(out0, nxt);
+      nxt += 8;
+      SW(out1, nxt);
+      nxt += 4;
+   }
+}
+
+void png_read_filter_row_paeth4_msa(png_row_infop row_info,
+                                    png_bytep row,
+                                    png_const_bytep prev_row)
+{
+   int32_t count, rp_end;
+   png_bytep nxt;
+   png_const_bytep prev_nxt;
+   int32_t inp0, inp1, res0;
+   v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+   v16u8 src10, src11, src12, src13, dst0, dst1;
+   v8i16 vec0, vec1, vec2;
+   v16u8 zero = { 0 };
+
+   nxt = row;
+   prev_nxt = prev_row;
+
+   inp0 = LW(nxt);
+   inp1 = LW(prev_nxt);
+   prev_nxt += 4;
+   src0 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp0);
+   src1 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp1);
+
+   src1 += src0;
+   res0 = __msa_copy_s_w((v4i32) src1, 0);
+
+   SW(res0, nxt);
+   nxt += 4;
+
+   /* Remainder */
+   rp_end = row_info->rowbytes - 4;
+
+   for (count = 0; count < rp_end; count += 16)
+   {
+      src2 = LD_UB(prev_nxt);
+      prev_nxt += 16;
+      src6 = LD_UB(prev_row);
+      prev_row += 16;
+      src10 = LD_UB(nxt);
+
+      SLDI_B3_0_UB(src2, src6, src10, src3, src7, src11, 4);
+      SLDI_B3_0_UB(src2, src6, src10, src4, src8, src12, 8);
+      SLDI_B3_0_UB(src2, src6, src10, src5, src9, src13, 12);
+      ILVR_B2_SH(src2, src6, src1, src6, vec0, vec1);
+      HSUB_UB2_SH(vec0, vec1, vec0, vec1);
+      vec2 = vec0 + vec1;
+      ADD_ABS_H3_SH(vec0, vec1, vec2, vec0, vec1, vec2);
+      CMP_AND_SELECT(vec0, vec1, vec2, src1, src2, src6, src10);
+      ILVR_B2_SH(src3, src7, src10, src7, vec0, vec1);
+      HSUB_UB2_SH(vec0, vec1, vec0, vec1);
+      vec2 = vec0 + vec1;
+      ADD_ABS_H3_SH(vec0, vec1, vec2, vec0, vec1, vec2);
+      CMP_AND_SELECT(vec0, vec1, vec2, src10, src3, src7, src11);
+      ILVR_B2_SH(src4, src8, src11, src8, vec0, vec1);
+      HSUB_UB2_SH(vec0, vec1, vec0, vec1);
+      vec2 = vec0 + vec1;
+      ADD_ABS_H3_SH(vec0, vec1, vec2, vec0, vec1, vec2);
+      CMP_AND_SELECT(vec0, vec1, vec2, src11, src4, src8, src12);
+      ILVR_B2_SH(src5, src9, src12, src9, vec0, vec1);
+      HSUB_UB2_SH(vec0, vec1, vec0, vec1);
+      vec2 = vec0 + vec1;
+      ADD_ABS_H3_SH(vec0, vec1, vec2, vec0, vec1, vec2);
+      CMP_AND_SELECT(vec0, vec1, vec2, src12, src5, src9, src13);
+      src1 = src13;
+      ILVEV_W2_UB(src10, src11, src12, src1, dst0, dst1);
+      dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
+
+      ST_UB(dst0, nxt);
+      nxt += 16;
+   }
+}
+
+void png_read_filter_row_paeth3_msa(png_row_infop row_info,
+                                    png_bytep row,
+                                    png_const_bytep prev_row)
+{
+   int32_t count, rp_end;
+   png_bytep nxt;
+   png_const_bytep prev_nxt;
+   int64_t out0;
+   int32_t inp0, inp1, out1;
+   int16_t out2;
+   v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, dst0, dst1;
+   v16u8 src10, src11, src12, src13;
+   v8i16 vec0, vec1, vec2;
+   v16u8 zero = { 0 };
+   v16i8 mask0 = { 0, 1, 2, 16, 17, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+   v16i8 mask1 = { 0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20, 21, 0, 0, 0, 0 };
+
+   nxt = row;
+   prev_nxt = prev_row;
+
+   inp0 = LW(nxt);
+   inp1 = LW(prev_nxt);
+   prev_nxt += 3;
+   src0 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp0);
+   src1 = (v16u8) __msa_insert_w((v4i32) zero, 0, inp1);
+
+   src1 += src0;
+   out2 = __msa_copy_s_h((v8i16) src1, 0);
+
+   SH(out2, nxt);
+   nxt += 2;
+   nxt[0] = src1[2];
+   nxt++;
+
+   /* Remainder */
+   rp_end = row_info->rowbytes - 3;
+
+   for (count = 0; count < rp_end; count += 12)
+   {
+      src2 = LD_UB(prev_nxt);
+      prev_nxt += 12;
+      src6 = LD_UB(prev_row);
+      prev_row += 12;
+      src10 = LD_UB(nxt);
+
+      SLDI_B3_0_UB(src2, src6, src10, src3, src7, src11, 3);
+      SLDI_B3_0_UB(src2, src6, src10, src4, src8, src12, 6);
+      SLDI_B3_0_UB(src2, src6, src10, src5, src9, src13, 9);
+      ILVR_B2_SH(src2, src6, src1, src6, vec0, vec1);
+      HSUB_UB2_SH(vec0, vec1, vec0, vec1);
+      vec2 = vec0 + vec1;
+      ADD_ABS_H3_SH(vec0, vec1, vec2, vec0, vec1, vec2);
+      CMP_AND_SELECT(vec0, vec1, vec2, src1, src2, src6, src10);
+      ILVR_B2_SH(src3, src7, src10, src7, vec0, vec1);
+      HSUB_UB2_SH(vec0, vec1, vec0, vec1);
+      vec2 = vec0 + vec1;
+      ADD_ABS_H3_SH(vec0, vec1, vec2, vec0, vec1, vec2);
+      CMP_AND_SELECT(vec0, vec1, vec2, src10, src3, src7, src11);
+      ILVR_B2_SH(src4, src8, src11, src8, vec0, vec1);
+      HSUB_UB2_SH(vec0, vec1, vec0, vec1);
+      vec2 = vec0 + vec1;
+      ADD_ABS_H3_SH(vec0, vec1, vec2, vec0, vec1, vec2);
+      CMP_AND_SELECT(vec0, vec1, vec2, src11, src4, src8, src12);
+      ILVR_B2_SH(src5, src9, src12, src9, vec0, vec1);
+      HSUB_UB2_SH(vec0, vec1, vec0, vec1);
+      vec2 = vec0 + vec1;
+      ADD_ABS_H3_SH(vec0, vec1, vec2, vec0, vec1, vec2);
+      CMP_AND_SELECT(vec0, vec1, vec2, src12, src5, src9, src13);
+      src1 = src13;
+      VSHF_B2_UB(src10, src11, src12, src13, mask0, mask0, dst0, dst1);
+      dst0 = (v16u8) __msa_vshf_b(mask1, (v16i8) dst1, (v16i8) dst0);
+      out0 = __msa_copy_s_d((v2i64) dst0, 0);
+      out1 = __msa_copy_s_w((v4i32) dst0, 2);
+
+      SD(out0, nxt);
+      nxt += 8;
+      SW(out1, nxt);
+      nxt += 4;
+   }
+}
+
+#endif /* PNG_MIPS_MSA_OPT > 0 */
+#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 (intrinsics) */
+#endif /* READ */
--- a/xs/src/png/libpng/mips/mips_init.c
+++ b/xs/src/png/libpng/mips/mips_init.c
@ -0,0 +1,129 @@
+
+/* mips_init.c - MSA optimised filter functions
+ *
+ * Copyright (c) 2016 Glenn Randers-Pehrson
+ * Written by Mandar Sahastrabuddhe, 2016.
+ * Last changed in libpng 1.6.25 [September 1, 2016]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
+ * called.
+ */
+#define _POSIX_SOURCE 1
+
+#include <stdio.h>
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+#if PNG_MIPS_MSA_OPT > 0
+#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do run-time checks */
+/* WARNING: it is strongly recommended that you do not build libpng with
+ * run-time checks for CPU features if at all possible.  In the case of the MIPS
+ * MSA instructions there is no processor-specific way of detecting the
+ * presence of the required support, therefore run-time detection is extremely
+ * OS specific.
+ *
+ * You may set the macro PNG_MIPS_MSA_FILE to the file name of file containing
+ * a fragment of C source code which defines the png_have_msa function.  There
+ * are a number of implementations in contrib/mips-msa, but the only one that
+ * has partial support is contrib/mips-msa/linux.c - a generic Linux
+ * implementation which reads /proc/cpufino.
+ */
+#ifndef PNG_MIPS_MSA_FILE
+#  ifdef __linux__
+#     define PNG_MIPS_MSA_FILE "contrib/mips-msa/linux.c"
+#  endif
+#endif
+
+#ifdef PNG_MIPS_MSA_FILE
+
+#include <signal.h> /* for sig_atomic_t */
+static int png_have_msa(png_structp png_ptr);
+#include PNG_MIPS_MSA_FILE
+
+#else  /* PNG_MIPS_MSA_FILE */
+#  error "PNG_MIPS_MSA_FILE undefined: no support for run-time MIPS MSA checks"
+#endif /* PNG_MIPS_MSA_FILE */
+#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */
+
+#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
+#  error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
+#endif
+
+void
+png_init_filter_functions_msa(png_structp pp, unsigned int bpp)
+{
+   /* The switch statement is compiled in for MIPS_MSA_API, the call to
+    * png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined
+    * the check is only performed if the API has not set the MSA option on
+    * or off explicitly. In this case the check controls what happens.
+    */
+
+#ifdef PNG_MIPS_MSA_API_SUPPORTED
+   switch ((pp->options >> PNG_MIPS_MSA) & 3)
+   {
+      case PNG_OPTION_UNSET:
+         /* Allow the run-time check to execute if it has been enabled -
+          * thus both API and CHECK can be turned on.  If it isn't supported
+          * this case will fall through to the 'default' below, which just
+          * returns.
+          */
+#endif /* PNG_MIPS_MSA_API_SUPPORTED */
+#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED
+         {
+            static volatile sig_atomic_t no_msa = -1; /* not checked */
+
+            if (no_msa < 0)
+               no_msa = !png_have_msa(pp);
+
+            if (no_msa)
+               return;
+         }
+#ifdef PNG_MIPS_MSA_API_SUPPORTED
+         break;
+#endif
+#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */
+
+#ifdef PNG_MIPS_MSA_API_SUPPORTED
+      default: /* OFF or INVALID */
+         return;
+
+      case PNG_OPTION_ON:
+         /* Option turned on */
+         break;
+   }
+#endif
+
+   /* IMPORTANT: any new external functions used here must be declared using
+    * PNG_INTERNAL_FUNCTION in ../pngpriv.h.  This is required so that the
+    * 'prefix' option to configure works:
+    *
+    *    ./configure --with-libpng-prefix=foobar_
+    *
+    * Verify you have got this right by running the above command, doing a build
+    * and examining pngprefix.h; it must contain a #define for every external
+    * function you add.  (Notice that this happens automatically for the
+    * initialization function.)
+    */
+   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_msa;
+
+   if (bpp == 3)
+   {
+      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_msa;
+      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_msa;
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_msa;
+   }
+
+   else if (bpp == 4)
+   {
+      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_msa;
+      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa;
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_msa;
+   }
+}
+#endif /* PNG_MIPS_MSA_OPT > 0 */
+#endif /* READ */
--- a/xs/src/png/libpng/pngusr.dfa
+++ b/xs/src/png/libpng/pngusr.dfa
@ -0,0 +1,14 @@
+# pngusr.dfa
+#
+# Build time configuration of libpng
+#
+# Enter build configuration options in this file
+#
+# Security settings: by default these limits are unset, you can change them
+# here by entering the appropriate values as #defines preceded by '@' (to cause,
+# them to be passed through to the build of pnglibconf.h), for example:
+#
+# @# define PNG_USER_WIDTH_MAX 65535
+# @# define PNG_USER_HEIGHT_MAX 65535
+# @# define PNG_USER_CHUNK_CACHE_MAX 256
+# @# define PNG_USER_CHUNK_MALLOC_MAX 640000
--- a/xs/src/png/libpng/powerpc/filter_vsx_intrinsics.c
+++ b/xs/src/png/libpng/powerpc/filter_vsx_intrinsics.c
@ -0,0 +1,767 @@
+/* filter_vsx_intrinsics.c - PowerPC optimised filter functions
+ *
+ * Copyright (c) 2017 Glenn Randers-Pehrson
+ * Written by Vadim Barkov, 2017.
+ * Last changed in libpng 1.6.29 [March 16, 2017]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+/* This code requires -maltivec and -mvsx on the command line: */
+#if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
+
+#include <altivec.h>
+
+#if PNG_POWERPC_VSX_OPT > 0
+
+#ifndef __VSX__
+#  error "This code requires VSX support (POWER7 and later). Please provide -mvsx compiler flag."
+#endif
+
+#define vec_ld_unaligned(vec,data) vec = vec_vsx_ld(0,data)
+#define vec_st_unaligned(vec,data) vec_vsx_st(vec,0,data)
+
+
+/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
+ * They're positioned like this:
+ *    prev:  c b
+ *    row:   a d
+ * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
+ * whichever of a, b, or c is closest to p=a+b-c.
+ * ( this is taken from ../intel/filter_sse2_intrinsics.c )
+ */
+
+#define vsx_declare_common_vars(row_info,row,prev_row,offset) \
+   png_byte i;\
+   png_bytep rp = row + offset;\
+   png_const_bytep pp = prev_row;\
+   png_size_t unaligned_top = 16 - (((png_size_t)rp % 16));\
+   png_size_t istop;\
+   if(unaligned_top == 16)\
+      unaligned_top = 0;\
+   istop = row_info->rowbytes;\
+   if((unaligned_top < istop))\
+      istop -= unaligned_top;\
+   else{\
+      unaligned_top = istop;\
+      istop = 0;\
+   }
+
+void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
+                                png_const_bytep prev_row)
+{
+   vector unsigned char rp_vec;
+   vector unsigned char pp_vec;
+   vsx_declare_common_vars(row_info,row,prev_row,0)
+
+   /* Altivec operations require 16-byte aligned data
+    * but input can be unaligned. So we calculate
+    * unaligned part as usual.
+    */
+   for (i = 0; i < unaligned_top; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+      rp++;
+   }
+
+   /* Using SIMD while we can */
+   while( istop >= 16 )
+   {
+      rp_vec = vec_ld(0,rp);
+      vec_ld_unaligned(pp_vec,pp);
+
+      rp_vec = vec_add(rp_vec,pp_vec);
+
+      vec_st(rp_vec,0,rp);
+
+      pp += 16;
+      rp += 16;
+      istop -= 16;
+   }
+
+   if(istop > 0)
+   {
+      /* If byte count of row is not divisible by 16
+       * we will process remaining part as usual
+       */
+      for (i = 0; i < istop; i++)
+      {
+         *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+         rp++;
+      }
+}
+
+}
+
+static const vector unsigned char VSX_LEFTSHIFTED1_4 = {16,16,16,16, 0, 1, 2, 3,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_LEFTSHIFTED2_4 = {16,16,16,16,16,16,16,16, 4, 5, 6, 7,16,16,16,16};
+static const vector unsigned char VSX_LEFTSHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 8, 9,10,11};
+
+static const vector unsigned char VSX_LEFTSHIFTED1_3 = {16,16,16, 0, 1, 2,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_LEFTSHIFTED2_3 = {16,16,16,16,16,16, 3, 4, 5,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_LEFTSHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 6, 7, 8,16,16,16,16};
+static const vector unsigned char VSX_LEFTSHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 9,10,11,16};
+
+static const vector unsigned char VSX_NOT_SHIFTED1_4 = {16,16,16,16, 4, 5, 6, 7,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_NOT_SHIFTED2_4 = {16,16,16,16,16,16,16,16, 8, 9,10,11,16,16,16,16};
+static const vector unsigned char VSX_NOT_SHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,15};
+
+static const vector unsigned char VSX_NOT_SHIFTED1_3 = {16,16,16, 3, 4, 5,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_NOT_SHIFTED2_3 = {16,16,16,16,16,16, 6, 7, 8,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_NOT_SHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 9,10,11,16,16,16,16};
+static const vector unsigned char VSX_NOT_SHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,16};
+
+static const vector unsigned char VSX_CHAR_ZERO = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+#ifdef __LITTLE_ENDIAN__
+
+static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = { 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = { 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16};
+
+static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6};
+
+static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = { 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = { 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = { 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16};
+
+static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16};
+
+#elif defined(__BIG_ENDIAN__)
+
+static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = {16, 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = {16, 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {16,12,16,13,16,14,16,15,16,16,16,16,16,16,16,16};
+
+static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 1, 3, 5, 7,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 1, 3, 5, 7,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5, 7};
+
+static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = {16, 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = {16, 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = {16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {16,12,16,13,16,14,16,16,16,16,16,16,16,16,16,16};
+
+static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 1, 3, 5,16,16,16,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 1, 3, 5,16,16,16,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 1, 3, 5,16,16,16,16};
+static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5,16};
+
+#endif
+
+#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VSX_CHAR_ZERO,VSX_CHAR_TO_SHORT##offset##_##bpp)
+#define vsx_short_to_char(vec,offset,bpp) vec_perm(((vector unsigned char)(vec)),VSX_CHAR_ZERO,VSX_SHORT_TO_CHAR##offset##_##bpp)
+
+#ifdef PNG_USE_ABS
+#  define vsx_abs(number) abs(number)
+#else
+#  define vsx_abs(number) (number > 0) ? (number) : -(number)
+#endif
+
+void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
+                                  png_const_bytep prev_row)
+{
+   const png_byte bpp = 4;
+
+   vector unsigned char rp_vec;
+   vector unsigned char part_vec;
+
+   vsx_declare_common_vars(row_info,row,prev_row,bpp)
+
+   PNG_UNUSED(pp)
+
+   /* Altivec operations require 16-byte aligned data
+    * but input can be unaligned. So we calculate
+    * unaligned part as usual.
+    */
+   for (i = 0; i < unaligned_top; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+      rp++;
+   }
+
+   /* Using SIMD while we can */
+   while( istop >= 16 )
+   {
+      for(i=0;i < bpp ; i++)
+      {
+         *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+         rp++;
+      }
+      rp -= bpp;
+
+      rp_vec = vec_ld(0,rp);
+      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4);
+      rp_vec = vec_add(rp_vec,part_vec);
+
+      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4);
+      rp_vec = vec_add(rp_vec,part_vec);
+
+      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4);
+      rp_vec = vec_add(rp_vec,part_vec);
+
+      vec_st(rp_vec,0,rp);
+
+      rp += 16;
+      istop -= 16;
+   }
+
+   if(istop > 0)
+      for (i = 0; i < istop % 16; i++)
+      {
+         *rp = (png_byte)(((int)(*rp) + (int)(*(rp - bpp))) & 0xff);
+         rp++;
+      }
+
+}
+
+void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row,
+                                  png_const_bytep prev_row)
+{
+   const png_byte bpp = 3;
+
+   vector unsigned char rp_vec;
+   vector unsigned char part_vec;
+
+   vsx_declare_common_vars(row_info,row,prev_row,bpp)
+
+   PNG_UNUSED(pp)
+
+   /* Altivec operations require 16-byte aligned data
+    * but input can be unaligned. So we calculate
+    * unaligned part as usual.
+    */
+   for (i = 0; i < unaligned_top; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+      rp++;
+   }
+
+   /* Using SIMD while we can */
+   while( istop >= 16 )
+   {
+      for(i=0;i < bpp ; i++)
+      {
+         *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+         rp++;
+      }
+      rp -= bpp;
+
+      rp_vec = vec_ld(0,rp);
+      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3);
+      rp_vec = vec_add(rp_vec,part_vec);
+
+      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3);
+      rp_vec = vec_add(rp_vec,part_vec);
+
+      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3);
+      rp_vec = vec_add(rp_vec,part_vec);
+
+      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3);
+      rp_vec = vec_add(rp_vec,part_vec);
+
+      vec_st(rp_vec,0,rp);
+      rp += 15;
+      istop -= 16;
+
+      /* Since 16 % bpp = 16 % 3 = 1, last element of array must
+       * be proceeded manually
+       */
+      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+      rp++;
+   }
+
+   if(istop > 0)
+      for (i = 0; i < istop % 16; i++)
+      {
+         *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+         rp++;
+      }
+}
+
+void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row,
+                                  png_const_bytep prev_row)
+{
+   const png_byte bpp = 4;
+
+   vector unsigned char rp_vec;
+   vector unsigned char pp_vec;
+   vector unsigned char pp_part_vec;
+   vector unsigned char rp_part_vec;
+   vector unsigned char avg_vec;
+
+   vsx_declare_common_vars(row_info,row,prev_row,bpp)
+   rp -= bpp;
+   if(istop >= bpp)
+      istop -= bpp;
+
+   for (i = 0; i < bpp; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) +
+         ((int)(*pp++) / 2 )) & 0xff);
+
+      rp++;
+   }
+
+   /* Altivec operations require 16-byte aligned data
+    * but input can be unaligned. So we calculate
+    * unaligned part as usual.
+    */
+   for (i = 0; i < unaligned_top; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) +
+         (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
+
+      rp++;
+   }
+
+   /* Using SIMD while we can */
+   while( istop >= 16 )
+   {
+      for(i=0;i < bpp ; i++)
+      {
+         *rp = (png_byte)(((int)(*rp) +
+            (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
+
+         rp++;
+      }
+      rp -= bpp;
+      pp -= bpp;
+
+      vec_ld_unaligned(pp_vec,pp);
+      rp_vec = vec_ld(0,rp);
+
+      rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4);
+      pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_4);
+      avg_vec = vec_avg(rp_part_vec,pp_part_vec);
+      avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
+      rp_vec = vec_add(rp_vec,avg_vec);
+
+      rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4);
+      pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_4);
+      avg_vec = vec_avg(rp_part_vec,pp_part_vec);
+      avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
+      rp_vec = vec_add(rp_vec,avg_vec);
+
+      rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4);
+      pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_4);
+      avg_vec = vec_avg(rp_part_vec,pp_part_vec);
+      avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
+      rp_vec = vec_add(rp_vec,avg_vec);
+
+      vec_st(rp_vec,0,rp);
+
+      rp += 16;
+      pp += 16;
+      istop -= 16;
+   }
+
+   if(istop  > 0)
+      for (i = 0; i < istop % 16; i++)
+      {
+         *rp = (png_byte)(((int)(*rp) +
+            (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
+
+         rp++;
+      }
+}
+
+void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
+                                  png_const_bytep prev_row)
+{
+  const png_byte bpp = 3;
+
+  vector unsigned char rp_vec;
+  vector unsigned char pp_vec;
+  vector unsigned char pp_part_vec;
+  vector unsigned char rp_part_vec;
+  vector unsigned char avg_vec;
+
+  vsx_declare_common_vars(row_info,row,prev_row,bpp)
+  rp -= bpp;
+  if(istop >= bpp)
+     istop -= bpp;
+
+  for (i = 0; i < bpp; i++)
+  {
+     *rp = (png_byte)(((int)(*rp) +
+        ((int)(*pp++) / 2 )) & 0xff);
+
+     rp++;
+  }
+
+  /* Altivec operations require 16-byte aligned data
+   * but input can be unaligned. So we calculate
+   * unaligned part as usual.
+   */
+  for (i = 0; i < unaligned_top; i++)
+  {
+     *rp = (png_byte)(((int)(*rp) +
+        (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
+
+     rp++;
+  }
+
+  /* Using SIMD while we can */
+  while( istop >= 16 )
+  {
+     for(i=0;i < bpp ; i++)
+     {
+        *rp = (png_byte)(((int)(*rp) +
+           (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
+
+        rp++;
+     }
+     rp -= bpp;
+     pp -= bpp;
+
+     vec_ld_unaligned(pp_vec,pp);
+     rp_vec = vec_ld(0,rp);
+
+     rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3);
+     pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_3);
+     avg_vec = vec_avg(rp_part_vec,pp_part_vec);
+     avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
+     rp_vec = vec_add(rp_vec,avg_vec);
+
+     rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3);
+     pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_3);
+     avg_vec = vec_avg(rp_part_vec,pp_part_vec);
+     avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
+     rp_vec = vec_add(rp_vec,avg_vec);
+
+     rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3);
+     pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_3);
+     avg_vec = vec_avg(rp_part_vec,pp_part_vec);
+     avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
+     rp_vec = vec_add(rp_vec,avg_vec);
+
+     rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3);
+     pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED4_3);
+     avg_vec = vec_avg(rp_part_vec,pp_part_vec);
+     avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
+     rp_vec = vec_add(rp_vec,avg_vec);
+
+     vec_st(rp_vec,0,rp);
+
+     rp += 15;
+     pp += 15;
+     istop -= 16;
+
+     /* Since 16 % bpp = 16 % 3 = 1, last element of array must
+      * be proceeded manually
+      */
+     *rp = (png_byte)(((int)(*rp) +
+        (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
+     rp++;
+  }
+
+  if(istop  > 0)
+     for (i = 0; i < istop % 16; i++)
+     {
+        *rp = (png_byte)(((int)(*rp) +
+           (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
+
+        rp++;
+     }
+}
+
+/* Bytewise c ? t : e. */
+#define if_then_else(c,t,e) vec_sel(e,t,c)
+
+#define vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) {\
+      c = *(pp - bpp);\
+      a = *(rp - bpp);\
+      b = *pp++;\
+      p = b - c;\
+      pc = a - c;\
+      pa = vsx_abs(p);\
+      pb = vsx_abs(pc);\
+      pc = vsx_abs(p + pc);\
+      if (pb < pa) pa = pb, a = b;\
+      if (pc < pa) a = c;\
+      a += *rp;\
+      *rp++ = (png_byte)a;\
+      }
+
+void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   const png_byte bpp = 4;
+
+   int a, b, c, pa, pb, pc, p;
+   vector unsigned char rp_vec;
+   vector unsigned char pp_vec;
+   vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
+   vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
+
+   vsx_declare_common_vars(row_info,row,prev_row,bpp)
+   rp -= bpp;
+   if(istop >= bpp)
+      istop -= bpp;
+
+   /* Process the first pixel in the row completely (this is the same as 'up'
+    * because there is only one candidate predictor for the first row).
+    */
+   for(i = 0; i < bpp ; i++)
+   {
+      *rp = (png_byte)( *rp + *pp);
+      rp++;
+      pp++;
+   }
+
+   for(i = 0; i < unaligned_top ; i++)
+   {
+      vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
+   }
+
+   while( istop >= 16)
+   {
+      for(i = 0; i < bpp ; i++)
+      {
+         vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
+      }
+
+      rp -= bpp;
+      pp -= bpp;
+      rp_vec = vec_ld(0,rp);
+      vec_ld_unaligned(pp_vec,pp);
+
+      a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4);
+      b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_4),1,4);
+      c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4);
+      pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+      pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+      pc_vec = vec_add(pa_vec,pb_vec);
+      pa_vec = vec_abs(pa_vec);
+      pb_vec = vec_abs(pb_vec);
+      pc_vec = vec_abs(pc_vec);
+      smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
+      nearest_vec =  if_then_else(
+            vec_cmpeq(pa_vec,smallest_vec),
+            a_vec,
+            if_then_else(
+              vec_cmpeq(pb_vec,smallest_vec),
+              b_vec,
+              c_vec
+              )
+            );
+      rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4)));
+
+      a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4);
+      b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_4),2,4);
+      c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4);
+      pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+      pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+      pc_vec = vec_add(pa_vec,pb_vec);
+      pa_vec = vec_abs(pa_vec);
+      pb_vec = vec_abs(pb_vec);
+      pc_vec = vec_abs(pc_vec);
+      smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
+      nearest_vec =  if_then_else(
+            vec_cmpeq(pa_vec,smallest_vec),
+            a_vec,
+            if_then_else(
+              vec_cmpeq(pb_vec,smallest_vec),
+              b_vec,
+              c_vec
+              )
+            );
+      rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4)));
+
+      a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4);
+      b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_4),3,4);
+      c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4);
+      pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+      pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+      pc_vec = vec_add(pa_vec,pb_vec);
+      pa_vec = vec_abs(pa_vec);
+      pb_vec = vec_abs(pb_vec);
+      pc_vec = vec_abs(pc_vec);
+      smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
+      nearest_vec =  if_then_else(
+            vec_cmpeq(pa_vec,smallest_vec),
+            a_vec,
+            if_then_else(
+              vec_cmpeq(pb_vec,smallest_vec),
+              b_vec,
+              c_vec
+              )
+            );
+      rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4)));
+
+      vec_st(rp_vec,0,rp);
+
+      rp += 16;
+      pp += 16;
+      istop -= 16;
+   }
+
+   if(istop > 0)
+      for (i = 0; i < istop % 16; i++)
+      {
+         vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
+      }
+}
+
+void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+  const png_byte bpp = 3;
+
+  int a, b, c, pa, pb, pc, p;
+  vector unsigned char rp_vec;
+  vector unsigned char pp_vec;
+  vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
+  vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
+
+  vsx_declare_common_vars(row_info,row,prev_row,bpp)
+  rp -= bpp;
+  if(istop >= bpp)
+     istop -= bpp;
+
+  /* Process the first pixel in the row completely (this is the same as 'up'
+   * because there is only one candidate predictor for the first row).
+   */
+  for(i = 0; i < bpp ; i++)
+  {
+     *rp = (png_byte)( *rp + *pp);
+     rp++;
+     pp++;
+  }
+
+  for(i = 0; i < unaligned_top ; i++)
+  {
+     vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
+  }
+
+  while( istop >= 16)
+  {
+     for(i = 0; i < bpp ; i++)
+     {
+        vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
+     }
+
+     rp -= bpp;
+     pp -= bpp;
+     rp_vec = vec_ld(0,rp);
+     vec_ld_unaligned(pp_vec,pp);
+
+     a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3);
+     b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_3),1,3);
+     c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3);
+     pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+     pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+     pc_vec = vec_add(pa_vec,pb_vec);
+     pa_vec = vec_abs(pa_vec);
+     pb_vec = vec_abs(pb_vec);
+     pc_vec = vec_abs(pc_vec);
+     smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
+     nearest_vec =  if_then_else(
+           vec_cmpeq(pa_vec,smallest_vec),
+           a_vec,
+           if_then_else(
+             vec_cmpeq(pb_vec,smallest_vec),
+             b_vec,
+             c_vec
+             )
+           );
+     rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3)));
+
+     a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3);
+     b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_3),2,3);
+     c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3);
+     pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+     pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+     pc_vec = vec_add(pa_vec,pb_vec);
+     pa_vec = vec_abs(pa_vec);
+     pb_vec = vec_abs(pb_vec);
+     pc_vec = vec_abs(pc_vec);
+     smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
+     nearest_vec =  if_then_else(
+           vec_cmpeq(pa_vec,smallest_vec),
+           a_vec,
+           if_then_else(
+             vec_cmpeq(pb_vec,smallest_vec),
+             b_vec,
+             c_vec
+             )
+           );
+     rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3)));
+
+     a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3);
+     b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_3),3,3);
+     c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3);
+     pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+     pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+     pc_vec = vec_add(pa_vec,pb_vec);
+     pa_vec = vec_abs(pa_vec);
+     pb_vec = vec_abs(pb_vec);
+     pc_vec = vec_abs(pc_vec);
+     smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
+     nearest_vec =  if_then_else(
+           vec_cmpeq(pa_vec,smallest_vec),
+           a_vec,
+           if_then_else(
+             vec_cmpeq(pb_vec,smallest_vec),
+             b_vec,
+             c_vec
+             )
+           );
+     rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3)));
+
+     a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3);
+     b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED4_3),4,3);
+     c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3);
+     pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+     pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+     pc_vec = vec_add(pa_vec,pb_vec);
+     pa_vec = vec_abs(pa_vec);
+     pb_vec = vec_abs(pb_vec);
+     pc_vec = vec_abs(pc_vec);
+     smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
+     nearest_vec =  if_then_else(
+           vec_cmpeq(pa_vec,smallest_vec),
+           a_vec,
+           if_then_else(
+             vec_cmpeq(pb_vec,smallest_vec),
+             b_vec,
+             c_vec
+             )
+           );
+     rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3)));
+
+     vec_st(rp_vec,0,rp);
+
+     rp += 15;
+     pp += 15;
+     istop -= 16;
+
+     /* Since 16 % bpp = 16 % 3 = 1, last element of array must
+      * be proceeded manually
+      */
+     vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
+  }
+
+  if(istop > 0)
+     for (i = 0; i < istop % 16; i++)
+     {
+        vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
+     }
+}
+
+#endif /* PNG_POWERPC_VSX_OPT > 0 */
+#endif /* PNG_POWERPC_VSX_IMPLEMENTATION == 1 (intrinsics) */
+#endif /* READ */
--- a/xs/src/png/libpng/powerpc/powerpc_init.c
+++ b/xs/src/png/libpng/powerpc/powerpc_init.c
@ -0,0 +1,125 @@
+
+/* powerpc_init.c - POWERPC optimised filter functions
+ *
+ * Copyright (c) 2017 Glenn Randers-Pehrson
+ * Written by Vadim Barkov, 2017.
+ * Last changed in libpng 1.6.29 [March 16, 2017]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
+ * called.
+ */
+#define _POSIX_SOURCE 1
+
+#include <stdio.h>
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+#if PNG_POWERPC_VSX_OPT > 0
+#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED /* Do run-time checks */
+/* WARNING: it is strongly recommended that you do not build libpng with
+ * run-time checks for CPU features if at all possible.  In the case of the PowerPC
+ * VSX instructions there is no processor-specific way of detecting the
+ * presence of the required support, therefore run-time detection is extremely
+ * OS specific.
+ *
+ * You may set the macro PNG_POWERPC_VSX_FILE to the file name of file containing
+ * a fragment of C source code which defines the png_have_vsx function.  There
+ * are a number of implementations in contrib/powerpc-vsx, but the only one that
+ * has partial support is contrib/powerpc-vsx/linux.c - a generic Linux
+ * implementation which reads /proc/cpufino.
+ */
+#ifndef PNG_POWERPC_VSX_FILE
+#  ifdef __linux__
+#     define  PNG_POWERPC_VSX_FILE "contrib/powerpc-vsx/linux_aux.c"
+#  endif
+#endif
+
+#ifdef PNG_POWERPC_VSX_FILE
+
+#include <signal.h> /* for sig_atomic_t */
+static int png_have_vsx(png_structp png_ptr);
+#include PNG_POWERPC_VSX_FILE
+
+#else  /* PNG_POWERPC_VSX_FILE */
+#  error "PNG_POWERPC_VSX_FILE undefined: no support for run-time POWERPC VSX checks"
+#endif /* PNG_POWERPC_VSX_FILE */
+#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */
+
+void
+png_init_filter_functions_vsx(png_structp pp, unsigned int bpp)
+{
+   /* The switch statement is compiled in for POWERPC_VSX_API, the call to
+    * png_have_vsx is compiled in for POWERPC_VSX_CHECK. If both are defined
+    * the check is only performed if the API has not set the PowerPC option on
+    * or off explicitly. In this case the check controls what happens.
+    */
+
+#ifdef PNG_POWERPC_VSX_API_SUPPORTED
+   switch ((pp->options >> PNG_POWERPC_VSX) & 3)
+   {
+      case PNG_OPTION_UNSET:
+         /* Allow the run-time check to execute if it has been enabled -
+          * thus both API and CHECK can be turned on.  If it isn't supported
+          * this case will fall through to the 'default' below, which just
+          * returns.
+          */
+#endif /* PNG_POWERPC_VSX_API_SUPPORTED */
+#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED
+         {
+            static volatile sig_atomic_t no_vsx = -1; /* not checked */
+
+            if (no_vsx < 0)
+               no_vsx = !png_have_vsx(pp);
+
+            if (no_vsx)
+               return;
+         }
+#ifdef PNG_POWERPC_VSX_API_SUPPORTED
+         break;
+#endif
+#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */
+
+#ifdef PNG_POWERPC_VSX_API_SUPPORTED
+      default: /* OFF or INVALID */
+         return;
+
+      case PNG_OPTION_ON:
+         /* Option turned on */
+         break;
+   }
+#endif
+
+   /* IMPORTANT: any new internal functions used here must be declared using
+    * PNG_INTERNAL_FUNCTION in ../pngpriv.h.  This is required so that the
+    * 'prefix' option to configure works:
+    *
+    *    ./configure --with-libpng-prefix=foobar_
+    *
+    * Verify you have got this right by running the above command, doing a build
+    * and examining pngprefix.h; it must contain a #define for every external
+    * function you add.  (Notice that this happens automatically for the
+    * initialization function.)
+    */
+   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vsx;
+
+   if (bpp == 3)
+   {
+      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vsx;
+      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vsx;
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vsx;
+   }
+
+   else if (bpp == 4)
+   {
+      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vsx;
+      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vsx;
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vsx;
+   }
+}
+#endif /* PNG_POWERPC_VSX_OPT > 0 */
+#endif /* READ */
--- a/xs/src/png/libpng/scripts/checksym.awk
+++ b/xs/src/png/libpng/scripts/checksym.awk
@ -0,0 +1,173 @@
+#!/bin/awk -f
+# Check a list of symbols against the master definition
+# (official) list.  Arguments:
+#
+# awk -f checksym.awk official-def list-to-check
+#
+# Output is a file in the current directory called 'symbols.new',
+# the value of the awk variable "of" (which can be changed on the
+# command line if required.)  stdout holds error messages.  Error
+# code indicates success or failure.
+#
+# NOTE: this is a pure, old fashioned, awk script.  It will
+# work with any awk
+
+BEGIN{
+   err=0
+   master=""        # master file
+   official[1] = "" # defined symbols from master file
+   symbol[1] = ""   # defined symbols from png.h
+   removed[1] = ""  # removed symbols from png.h
+   lasto = 0        # last ordinal value from png.h
+   mastero = 0      # highest ordinal in master file
+   symbolo = 0      # highest ordinal in png.h
+   missing = "error"# log an error on missing symbols
+   of="symbols.new" # default to a fixed name
+}
+
+# Read existing definitions from the master file (the first
+# file on the command line.)  This must be a def file and it
+# has definition lines (others are ignored) of the form:
+#
+#   symbol @ordinal
+#
+master == "" {
+   master = FILENAME
+}
+FILENAME==master && NF==2 && $2~/^@/ && $1!~/^;/ {
+   o=0+substr($2,2)
+   if (o > 0) {
+      if (official[o] == "") {
+         official[o] = $1
+         if (o > mastero) mastero = o
+         next
+      } else
+         print master ": duplicated symbol:", official[o] ":", $0
+   } else
+      print master ": bad export line format:", $0
+   err = 1
+}
+FILENAME==master && $1==";missing" && NF==2{
+   # This allows the master file to control how missing symbols
+   # are handled; symbols that aren't in either the master or
+   # the new file.  Valid values are 'ignore', 'warning' and
+   # 'error'
+   missing = $2
+}
+FILENAME==master {
+   next
+}
+
+# Read new definitions, these are free form but the lines must
+# just be symbol definitions.  Lines will be commented out for
+# 'removed' symbols, introduced in png.h using PNG_REMOVED rather
+# than PNG_EXPORT.  Use symbols.dfn or pngwin.dfn to generate the
+# input file.
+#
+#  symbol @ordinal   # two fields, exported symbol
+#  ; symbol @ordinal # three fields, removed symbol
+#  ; @ordinal        # two fields, the last ordinal
+NF==2 && $1 == ";" && $2 ~ /^@[1-9][0-9]*$/ { # last ordinal
+   o=0+substr($2,2)
+   if (lasto == 0 || lasto == o)
+      lasto=o
+   else {
+      print "png.h: duplicated last ordinal:", lasto, o
+      err = 1
+   }
+   next
+}
+NF==3 && $1 == ";" && $3 ~ /^@[1-9][0-9]*$/ { # removed symbol
+   o=0+substr($3,2)
+   if (removed[o] == "" || removed[o] == $2) {
+      removed[o] = $2
+      if (o > symbolo) symbolo = o
+   } else {
+      print "png.h: duplicated removed symbol", o ": '" removed[o] "' != '" $2 "'"
+      err = 1
+   }
+   next
+}
+NF==2 && $2 ~ /^@[1-9][0-9]*$/ { # exported symbol
+   o=0+substr($2,2)
+   if (symbol[o] == "" || symbol[o] == $1) {
+      symbol[o] = $1
+      if (o > symbolo) symbolo = o
+   } else {
+      print "png.h: duplicated symbol", o ": '" symbol[o] "' != '" $1 "'"
+      err = 1
+   }
+}
+{
+   next # skip all other lines
+}
+
+# At the end check for symbols marked as both duplicated and removed
+END{
+   if (symbolo > lasto) {
+      print "highest symbol ordinal in png.h,", symbolo ", exceeds last ordinal from png.h", lasto
+      err = 1
+   }
+   if (mastero > lasto) {
+      print "highest symbol ordinal in", master ",", mastero ", exceeds last ordinal from png.h", lasto
+      err = 1
+   }
+   unexported=0
+   # Add a standard header to symbols.new:
+   print ";Version INSERT-VERSION-HERE" >of
+   print ";--------------------------------------------------------------" >of
+   print "; LIBPNG symbol list as a Win32 DEF file" >of
+   print "; Contains all the symbols that can be exported from libpng" >of
+   print ";--------------------------------------------------------------" >of
+   print "LIBRARY" >of
+   print "" >of
+   print "EXPORTS" >of
+
+   for (o=1; o<=lasto; ++o) {
+      if (symbol[o] == "" && removed[o] == "") {
+         if (unexported == 0) unexported = o
+         if (official[o] == "") {
+            # missing in export list too, so ok
+            if (o < lasto) continue
+         }
+      }
+      if (unexported != 0) {
+         # Symbols in the .def but not in the new file are errors, but
+         # the 'unexported' symbols aren't in either.  By default this
+         # is an error too (see the setting of 'missing' at the start),
+         # but this can be reset on the command line or by stuff in the
+         # file - see the comments above.
+         if (missing != "ignore") {
+            if (o-1 > unexported)
+               print "png.h:", missing ": missing symbols:", unexported "-" o-1
+            else
+               print "png.h:", missing ": missing symbol:", unexported
+            if (missing != "warning")
+               err = 1
+         }
+         unexported = 0
+      }
+      if (symbol[o] != "" && removed[o] != "") {
+         print "png.h: symbol", o, "both exported as '" symbol[o] "' and removed as '" removed[o] "'"
+         err = 1
+      } else if (symbol[o] != official[o]) {
+         # either the symbol is missing somewhere or it changed
+         err = 1
+         if (symbol[o] == "")
+            print "png.h: symbol", o, "is exported as '" official[o] "' in", master
+         else if (official[o] == "")
+            print "png.h: exported symbol", o, "'" symbol[o] "' not present in", master
+         else
+            print "png.h: exported symbol", o, "'" symbol[o] "' exists as '" official[o] "' in", master
+      }
+
+      # Finally generate symbols.new
+      if (symbol[o] != "")
+         print " " symbol[o], "@" o > of
+   }
+
+   if (err != 0) {
+      print "*** A new list is in", of, "***"
+      exit 1
+   }
+}
--- a/xs/src/png/libpng/scripts/def.c
+++ b/xs/src/png/libpng/scripts/def.c
@ -0,0 +1,29 @@
+/* def.c - define format of libpng.def
+ *
+ * Last changed in libpng version 1.6.16 [December 22, 2014]
+ * Copyright (c) 2011-2014 Glenn Randers-Pehrson
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+/* Write the export file header: */
+PNG_DFN ";--------------------------------------------------------------"
+PNG_DFN "; LIBPNG module definition file for OS/2"
+PNG_DFN ";--------------------------------------------------------------"
+PNG_DFN ""
+PNG_DFN "; If you give the library an explicit name one or other files"
+PNG_DFN "; may need modifying to support the new name on one or more"
+PNG_DFN "; systems."
+PNG_DFN "LIBRARY"
+PNG_DFN "OS2 DESCRIPTION "PNG image compression library""
+PNG_DFN "OS2 CODE PRELOAD MOVEABLE DISCARDABLE"
+PNG_DFN ""
+PNG_DFN "EXPORTS"
+PNG_DFN ";Version 1.6.35beta02"
+
+#define PNG_EXPORTA(ordinal, type, name, args, attributes)\
+        PNG_DFN "@" SYMBOL_PREFIX "@@" name "@"
+
+#include "../png.h"
--- a/xs/src/png/libpng/scripts/dfn.awk
+++ b/xs/src/png/libpng/scripts/dfn.awk
@ -0,0 +1,203 @@
+#!/bin/awk -f
+# scripts/dfn.awk - process a .dfn file
+#
+# last changed in libpng version 1.5.19 - August 21, 2014
+#
+# Copyright (c) 2013-2014 Glenn Randers-Pehrson
+#
+# This code is released under the libpng license.
+# For conditions of distribution and use, see the disclaimer
+# and license in png.h
+
+# The output of this script is written to the file given by
+# the variable 'out', which should be set on the command line.
+# Error messages are printed to stdout and if any are printed
+# the script will exit with error code 1.
+
+BEGIN{
+   out="/dev/null"       # as a flag
+   out_count=0           # count of output lines
+   err=0                 # set if an error occurred
+   sort=0                # sort the output
+   array[""]=""
+}
+
+# The output file must be specified before any input:
+NR==1 && out == "/dev/null" {
+   print "out=output.file must be given on the command line"
+   # but continue without setting the error code; this allows the
+   # script to be checked easily
+}
+
+# Output can be sorted; two lines are recognized
+$1 == "PNG_DFN_START_SORT"{
+   sort=0+$2
+   next
+}
+
+$1 ~ /^PNG_DFN_END_SORT/{
+   # Do a very simple, slow, sort; notice that blank lines won't be
+   # output by this
+   for (entry in array) {
+      while (array[entry] != "") {
+         key = entry
+         value = array[key]
+         array[key] = ""
+
+         for (alt in array) {
+            if (array[alt] != "" && alt < key) {
+               array[key] = value
+               value = array[alt]
+               key = alt
+               array[alt] = ""
+            }
+         }
+
+         print value >out
+      }
+   }
+   sort=0
+   next
+}
+
+/^[^"]*PNG_DFN *".*"[^"]*$/{
+   # A definition line, apparently correctly formatted; extract the
+   # definition then replace any doubled "" that remain with a single
+   # double quote.  Notice that the original doubled double quotes
+   # may have been split by tokenization
+   #
+   # Sometimes GCC splits the PNG_DFN lines; we know this has happened
+   # if the quotes aren't closed and must read another line.  In this
+   # case it is essential to reject lines that start with '#' because those
+   # are introduced #line directives.
+   orig=$0
+   line=$0
+   lineno=FNR
+   if (lineno == "") lineno=NR
+
+   if (sub(/^[^"]*PNG_DFN *"/,"",line) != 1) {
+	print "line", lineno ": processing failed:"
+	print orig
+	err=1
+       next
+   } else {
+	++out_count
+   }
+
+   # Now examine quotes within the value:
+   #
+   #   @" - delete this and any following spaces
+   #   "@ - delete this and any preceding spaces
+   #   @' - replace this by a double quote
+   #
+   # This allows macro substitution by the C compiler thus:
+   #
+   #   #define first_name John
+   #   #define last_name Smith
+   #
+   #	PNG_DFN"#define name @'@" first_name "@ @" last_name "@@'"
+   #
+   # Might get C preprocessed to:
+   #
+   #   PNG_DFN "#define foo @'@" John "@ @" Smith "@@'"
+   #
+   # Which this script reduces to:
+   #
+   #	#define name "John Smith"
+   #
+   while (1) {
+      # While there is an @" remove it and the next "@
+      if (line ~ /@"/) {
+         if (line ~ /@".*"@/) {
+            # Do this special case first to avoid swallowing extra spaces
+            # before or after the @ stuff:
+            if (!sub(/@" *"@/, "", line)) {
+               # Ok, do it in pieces - there has to be a non-space between the
+               # two.  NOTE: really weird things happen if a leading @" is
+               # lost - the code will error out below (I believe).
+               if (!sub(/@" */, "", line) || !sub(/ *"@/, "", line)) {
+                  print "line", lineno, ": internal error:", orig
+                  exit 1
+               }
+            }
+         }
+
+         # There is no matching "@.  Assume a split line
+         else while (1) {
+            if (getline nextline) {
+               # If the line starts with '#' it is a preprocesor line directive
+               # from cc -E; skip it:
+               if (nextline !~ /^#/) {
+                  line = line " " nextline
+                  break
+               }
+            } else {
+               # This is end-of-input - probably a missing "@ on the first line:
+               print "line", lineno ": unbalanced @\" ... \"@ pair"
+               err=1
+               next
+            }
+         }
+
+         # Keep going until all the @" have gone
+         continue
+      }
+
+      # Attempt to remove a trailing " (not preceded by '@') - if this can
+      # be done, stop now; if not assume a split line again
+      if (sub(/"[^"]*$/, "", line))
+         break
+
+      # Read another line
+      while (1) {
+         if (getline nextline) {
+            if (nextline !~ /^#/) {
+               line = line " " nextline
+               # Go back to stripping @" "@ pairs
+               break
+            }
+         } else {
+            print "line", lineno ": unterminated PNG_DFN string"
+            err=1
+            next
+         }
+      }
+   }
+
+   # Put any needed double quotes in (at the end, because these would otherwise
+   # interfere with the processing above.)
+   gsub(/@'/,"\"", line)
+
+   # Remove any trailing spaces (not really required, but for
+   # editorial consistency
+   sub(/ *$/, "", line)
+
+   # Remove trailing CR
+   sub(/
$/, "", line)
+
+   if (sort) {
+      if (split(line, parts) < sort) {
+         print "line", lineno ": missing sort field:", line
+         err=1
+      } else
+         array[parts[sort]] = line
+   }
+
+   else
+      print line >out
+   next
+}
+
+/PNG_DFN/{
+   print "line", NR, "incorrectly formatted PNG_DFN line:"
+   print $0
+   err = 1
+}
+
+END{
+   if (out_count > 0 || err > 0)
+	exit err
+
+   print "no definition lines found"
+   exit 1
+}
--- a/xs/src/png/libpng/scripts/intprefix.c
+++ b/xs/src/png/libpng/scripts/intprefix.c
@ -0,0 +1,22 @@
+
+/* intprefix.c - generate an unprefixed internal symbol list
+ *
+ * Last changed in libpng version 1.6.16 [December 22, 2014]
+ * Copyright (c) 2013-2014 Glenn Randers-Pehrson
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#define PNG_INTERNAL_DATA(type, name, array)\
+        PNG_DFN "@" name "@"
+
+#define PNG_INTERNAL_FUNCTION(type, name, args, attributes)\
+        PNG_DFN "@" name "@"
+
+#define PNG_INTERNAL_CALLBACK(type, name, args, attributes)\
+        PNG_DFN "@" name "@"
+
+#define PNGPREFIX_H /* self generation */
+#include "../pngpriv.h"
--- a/xs/src/png/libpng/scripts/options.awk
+++ b/xs/src/png/libpng/scripts/options.awk
@ -0,0 +1,898 @@
+#!/bin/awk -f
+# scripts/options.awk - library build configuration control
+#
+# last changed in libpng version 1.6.11 - June 5, 2014
+#
+# Copyright (c) 1998-2014 Glenn Randers-Pehrson
+#
+# This code is released under the libpng license.
+# For conditions of distribution and use, see the disclaimer
+# and license in png.h
+
+# The output of this script is written to the file given by
+# the variable 'out'.  The script is run twice, once with
+# an intermediate output file, 'options.tmp' then again on
+# that file to produce the final output:
+#
+#  awk -f scripts/options.awk out=options.tmp scripts/options.dfa 1>&2
+#  awk -f scripts/options.awk out=options.dfn options.tmp 1>&2
+#
+# Some options may be specified on the command line:
+#
+#  deb=1            Causes debugging to be output
+#  logunsupported=1 Causes all options to be recorded in the output
+#  everything=off   Causes all options to be disabled by default
+#  everything=on    Causes all options to be enabled by default
+#
+# If awk fails on your platform, try nawk instead.
+#
+# These options may also be specified in the original input file (and
+# are copied to the preprocessed file).
+
+BEGIN{
+   out=""                       # intermediate, preprocessed, file
+   pre=-1                       # preprocess (first line)
+   version="libpng version unknown" # version information
+   version_file=""              # where to find the version
+   err=0                        # in-line exit sets this
+   # The following definitions prevent the C preprocessor noticing the lines
+   # that will be in the final output file.  Some C preprocessors tokenise
+   # the lines, for example by inserting spaces around operators, and all
+   # C preprocessors notice lines that start with '#', most remove comments.
+   # The technique adopted here is to make the final output lines into
+   # C strings (enclosed in double quotes), preceded by PNG_DFN.  As a
+   # consequence the output cannot contain a 'raw' double quote - instead put
+   # @' in, this will be replaced by a single " afterward.  See the parser
+   # script dfn.awk for more capabilities (not required here).  Note that if
+   # you need a " in a 'setting' in pnglibconf.dfa it must also be @'!
+   dq="@'"                      # For a single double quote
+   start=" PNG_DFN \""          # Start stuff to output (can't contain a "!)
+   end="\" "                    # End stuff to output
+   subs="@\" "                  # Substitute start (substitute a C macro)
+   sube=" \"@"                  # Substitute end
+   comment=start "/*"           # Comment start
+   cend="*/" end                # Comment end
+   def=start "#define PNG_"     # Arbitrary define
+   sup="_SUPPORTED" end         # end supported option
+   und=comment "#undef PNG_"    # Unsupported option
+   une="_SUPPORTED" cend        # end unsupported option
+   error=start "ERROR:"         # error message, terminate with 'end'
+
+   # Variables
+   deb=0                        # debug - set on command line
+   everything=""                # do not override defaults
+   logunsupported=0             # write unsupported options too
+
+   # Precreate arrays
+   # for each option:
+   option[""] = ""    # list of all options: default enabled/disabled
+   done[""] = 1       # marks option as having been output
+   requires[""] = ""  # requires by option
+   iffs[""] = ""      # if by option
+   enabledby[""] = "" # options that enable it by option
+   sets[""] = ""      # settings set by each option
+   setval[""] = ""    # value to set (indexed: 'option sets[option]')
+   # for each setting:
+   setting[""] = ""   # requires by setting
+   defaults[""] = ""  # used for a defaulted value
+   doneset[""] = 1    # marks setting as having been output
+   r[""] = ""         # Temporary array
+
+   # For decorating the output file
+   protect = ""
+}
+
+# The output file must be specified before any input:
+out == "" {
+   print "out=output.file must be given on the command line"
+   err = 1
+   exit 1
+}
+
+# The very first line indicates whether we are reading pre-processed
+# input or not, this must come *first* because 'PREPROCESSED' needs
+# to be the very first line in the temporary file.
+pre == -1{
+   if ($0 == "PREPROCESSED") {
+      pre = 0
+      next
+   } else {
+      pre = 1
+      print "PREPROCESSED" >out
+      # And fall through to continue processing
+   }
+}
+
+# While pre-processing if version is set to "search" look for a version string
+# in the following file.
+pre && version == "search" && version_file == ""{
+   version_file = FILENAME
+}
+
+pre && version == "search" && version_file != FILENAME{
+   print "version string not found in", version_file
+   err = 1
+   exit 1
+}
+
+pre && version == "search" && $0 ~ /^ \* libpng version/{
+   version = substr($0, 4)
+   print "version =", version >out
+   next
+}
+
+pre && FILENAME == version_file{
+   next
+}
+
+# variable=value
+#   Sets the given variable to the given value (the syntax is fairly
+#   free form, except for deb (you are expected to understand how to
+#   set the debug variable...)
+#
+#   This happens before the check on 'pre' below skips most of the
+#   rest of the actions, so the variable settings happen during
+#   preprocessing but are recorded in the END action too.  This
+#   allows them to be set on the command line too.
+$0 ~ /^[ 	]*version[ 	]*=/{
+   sub(/^[  ]*version[  ]*=[  ]*/, "")
+   version = $0
+   next
+}
+$0 ~ /^[ 	]*everything[ 	=]*off[ 	]*$/{
+   everything = "off"
+   next
+}
+$0 ~ /^[ 	]*everything[ 	=]*on[ 	]*$/{
+   everything = "on"
+   next
+}
+$0 ~ /^[ 	]*logunsupported[ 	=]*0[ 	]*$/{
+   logunsupported = 0
+   next
+}
+$0 ~ /^[ 	]*logunsupported[ 	=]*1[ 	]*$/{
+   logunsupported = 1
+   next
+}
+$1 == "deb" && $2 == "=" && NF == 3{
+   deb = $3
+   next
+}
+
+# Preprocessing - this just copies the input file with lines
+# that need preprocessing (just chunk at present) expanded
+# The bare "pre" instead of "pre != 0" crashes under Sunos awk
+pre && $1 != "chunk"{
+   print >out
+   next
+}
+
+# The first characters of the line determine how it is processed,
+# leading spaces are ignored.  In general tokens that are not
+# keywords are the names of options.  An option 'name' is
+# controlled by the definition of the corresponding macros:
+#
+#   PNG_name_SUPPORTED    The option is turned on
+#   PNG_NO_name
+#   PNG_NO_name_SUPPORTED If the first macro is not defined
+#                         either of these will turn the option off
+#
+# If none of these macros are defined the option is turned on, unless
+# the keyword 'off' is given in a line relating to the option.  The
+# keyword 'on' can also be given, but it will be ignored (since it is
+# the default.)
+#
+# In the syntax below a 'name' is indicated by "NAME", other macro
+# values are indicated by "MACRO", as with "NAME" the leading "PNG_"
+# is omitted, but in this case the "NO_" prefix and the "_SUPPORTED"
+# suffix are never used.
+#
+# Each line is introduced by a keyword - the first non-space characters
+# on the line.  A line starting with a '#' is a comment - it is totally
+# ignored.  Keywords are as follows, a NAME, is simply a macro name
+# without the leading PNG_, PNG_NO_ or the trailing _SUPPORTED.
+
+$1 ~ /^#/ || $0 ~ /^[ 	]*$/{
+   next
+}
+
+# com <comment>
+#   The whole line is placed in the output file as a comment with
+#   the preceding 'com' removed
+$1 == "com"{
+   if (NF > 1) {
+      # sub(/^[ 	]*com[ 	]*/, "")
+      $1 = ""
+      print comment $0, cend >out
+   } else
+      print start end >out
+   next
+}
+
+# version
+#   Inserts a version comment
+$1 == "version" && NF == 1{
+   if (version == "") {
+      print "ERROR: no version string set"
+      err = 1 # prevent END{} running
+      exit 1
+   }
+
+   print comment, version, cend >out
+   next
+}
+
+# file output input protect
+#   Informational: the official name of the input file (without
+#   make generated local directories), the official name of the
+#   output file and, if required, a name to use in a protection
+#   macro for the contents.
+$1 == "file" && NF >= 2{
+   print comment, $2, cend >out
+   print comment, "Machine generated file: DO NOT EDIT", cend >out
+   if (NF >= 3)
+      print comment, "Derived from:", $3, cend >out
+   protect = $4
+   if (protect != "") {
+      print start "#ifndef", protect end >out
+      print start "#define", protect end >out
+   }
+   next
+}
+
+# option NAME ( (requires|enables|if) NAME* | on | off | disabled |
+#                sets SETTING VALUE+ )*
+#     
+#   Declares an option 'NAME' and describes its default setting (disabled)
+#   and its relationship to other options.  The option is disabled
+#   unless *all* the options listed after 'requires' are set and at
+#   least one of the options listed after 'if' is set.  If the
+#   option is set then it turns on all the options listed after 'enables'.
+#
+#   Note that "enables" takes priority over the required/if/disabled/off
+#   setting of the target option.
+#
+#   The definition file may list an option as 'disabled': off by default,
+#   otherwise the option is enabled: on by default.  A later (and it must
+#   be later) entry may turn an option on or off explicitly.
+
+$1 == "option" && NF >= 2{
+   opt = $2
+   sub(/,$/,"",opt)
+   onoff = option[opt]  # records current (and the default is "", enabled)
+   key = ""
+   istart = 3
+   do {
+      if (istart == 1) {     # continuation line
+         val = getline
+
+         if (val != 1) { # error reading it
+            if (val == 0)
+               print "option", opt ": ERROR: missing continuation line"
+            else
+               print "option", opt ": ERROR: error reading continuation line"
+
+            # This is a hard error
+            err = 1 # prevent END{} running
+            exit 1
+         }
+      }
+
+      for (i=istart; i<=NF; ++i) {
+         val=$(i)
+         sub(/,$/,"",val)
+         if (val == "on" || val == "off" || val == "disabled" || val =="enabled") {
+            key = ""
+            if (onoff != val) {
+               # on or off can zap disabled or enabled:
+               if (onoff == "" || (onoff == "disabled" || onoff == "enabled") &&
+                   (val == "on" || val == "off")) {
+                  # It's easy to mis-spell the option when turning it
+                  # on or off, so warn about it here:
+                  if (onoff == "" && (val == "on" || val == "off")) {
+                     print "option", opt ": ERROR: turning unrecognized option", val
+                     # For the moment error out - it is safer
+                     err = 1 # prevent END{} running
+                     exit 1
+                  }
+                  onoff = val
+               } else {
+                  # Print a message, otherwise the error
+                  # below is incomprehensible
+                  print "option", opt ": currently", onoff ": attempt to turn", val
+                  break
+               }
+            }
+         } else if (val == "requires" || val == "if" || val == "enables" || val =="sets") {
+            key = val
+         } else if (key == "requires") {
+            requires[opt] = requires[opt] " " val
+         } else if (key == "if") {
+            iffs[opt] = iffs[opt] " " val
+         } else if (key == "enables") {
+            enabledby[val] = enabledby[val] " " opt
+         } else if (key == "sets") {
+            sets[opt] = sets[opt] " " val
+            key = "setval"
+            set = val
+         } else if (key == "setval") {
+            setval[opt " " set] = setval[opt " " set] " " val
+         } else
+            break # bad line format
+      }
+
+      istart = 1
+   } while (i > NF && $0 ~ /,$/)
+
+   if (i > NF) {
+      # Set the option, defaulting to 'enabled'
+      if (onoff == "") onoff = "enabled"
+      option[opt] = onoff
+      next
+   }
+   # Else fall through to the error handler
+}
+
+# chunk NAME [requires OPT] [enables LIST] [on|off|disabled]
+#   Expands to the 'option' settings appropriate to the reading and
+#   writing of an ancillary PNG chunk 'NAME':
+#
+#   option READ_NAME requires READ_ANCILLARY_CHUNKS [READ_OPT]
+#   option READ_NAME enables NAME LIST
+#   [option READ_NAME off]
+#   option WRITE_NAME requires WRITE_ANCILLARY_CHUNKS [WRITE_OPT]
+#   option WRITE_NAME enables NAME LIST
+#   [option WRITE_NAME off]
+
+pre != 0 && $1 == "chunk" && NF >= 2{
+   # 'chunk' is handled on the first pass by writing appropriate
+   # 'option' lines into the intermediate file.
+   opt = $2
+   sub(/,$/,"",opt)
+   onoff = ""
+   reqread = ""
+   reqwrite = ""
+   enables = ""
+   req = 0
+   istart = 3
+   do {
+      if (istart == 1) {     # continuation line
+         val = getline
+
+         if (val != 1) { # error reading it
+            if (val == 0)
+               print "chunk", opt ": ERROR: missing continuation line"
+            else
+               print "chunk", opt ": ERROR: error reading continuation line"
+
+            # This is a hard error
+            err = 1 # prevent END{} running
+            exit 1
+         }
+      }
+
+      # read the keywords/additional OPTS
+      for (i=istart; i<=NF; ++i) {
+         val = $(i)
+         sub(/,$/,"",val)
+         if (val == "on" || val == "off" || val == "disabled") {
+            if (onoff != val) {
+               if (onoff == "")
+                  onoff = val
+               else
+                  break # on/off conflict
+            }
+            req = 0
+         } else if (val == "requires")
+            req = 1
+         else if (val == "enables")
+            req = 2
+         else if (req == 1){
+            reqread = reqread " READ_" val
+            reqwrite = reqwrite " WRITE_" val
+         } else if (req == 2)
+            enables = enables " " val
+         else
+            break # bad line: handled below
+      }
+
+      istart = 1
+   } while (i > NF && $0 ~ /,$/)
+
+   if (i > NF) {
+      # Output new 'option' lines to the intermediate file (out)
+      print "option READ_" opt, "requires READ_ANCILLARY_CHUNKS" reqread, "enables", opt enables , onoff >out
+      print "option WRITE_" opt, "requires WRITE_ANCILLARY_CHUNKS" reqwrite, "enables", opt enables, onoff >out
+      next
+   }
+   # Else hit the error handler below - bad line format!
+}
+
+# setting MACRO ( requires MACRO* )* [ default VALUE ]
+#   Behaves in a similar way to 'option' without looking for NO_ or
+#   _SUPPORTED; the macro is enabled if it is defined so long as all
+#   the 'requires' macros are also defined.  The definitions may be
+#   empty, an error will be issued if the 'requires' macros are
+#   *not* defined.  If given the 'default' value is used if the
+#   macro is not defined.  The default value will be re-tokenised.
+#   (BTW: this is somewhat restrictive, it mainly exists for the
+#   support of non-standard configurations and numeric parameters,
+#   see the uses in scripts/options.dat
+
+$1 == "setting" && (NF == 2 || NF >= 3 && ($3 == "requires" || $3 == "default")){
+   reqs = ""
+   deflt = ""
+   isdef = 0
+   key = ""
+   for (i=3; i<=NF; ++i)
+      if ($(i) == "requires" || $(i) == "default") {
+         key = $(i)
+         if (key == "default") isdef = 1
+      } else if (key == "requires")
+         reqs = reqs " " $(i)
+      else if (key == "default")
+         deflt = deflt " " $(i)
+      else
+         break # Format error, handled below
+
+   setting[$2] = reqs
+   # NOTE: this overwrites a previous value silently
+   if (isdef && deflt == "")
+      deflt = " " # as a flag to force output
+   defaults[$2] = deflt
+   next
+}
+
+# The order of the dependency lines (option, chunk, setting) is irrelevant
+# - the 'enables', 'requires' and 'if' settings will be used to determine
+# the correct order in the output and the final values in pnglibconf.h are
+# not order dependent.  'requires' and 'if' entries take precedence over
+# 'enables' from other options; if an option requires another option it
+# won't be set regardless of any options that enable it unless the other
+# option is also enabled.
+#
+# Similarly 'enables' trumps a NO_ definition in CFLAGS or pngusr.h
+#
+# For simplicity cycles in the definitions are regarded as errors,
+# even if they are not ambiguous.
+# A given NAME can be specified in as many 'option' lines as required, the
+# definitions are additive.
+
+# For backwards compatibility equivalent macros may be listed thus:
+#
+# = [NO_]NAME MACRO
+#   Makes -DMACRO equivalent to -DPNG_NO_NAME or -DPNG_NAME_SUPPORTED
+#   as appropriate.
+#
+# The definition is injected into the C compiler input when encountered
+# in the second pass (so all these definitions appear *after* the @
+# lines!)
+#
+# 'NAME' is as above, but 'MACRO' is the full text of the equivalent
+# old, deprecated, macro.
+
+$1 == "=" && NF == 3{
+   print "#ifdef PNG_" $3 >out
+   if ($2 ~ /^NO_/)
+      print "#   define PNG_" $2 >out
+   else
+      print "#   define PNG_" $2 "_SUPPORTED" >out
+   print "#endif" >out
+   next
+}
+
+# Lines may be injected into the C compiler input by preceding them
+# with an "@" character.  The line is copied with just the leading
+# @ removed.
+
+$1 ~ /^@/{
+   # sub(/^[ 	]*@/, "")
+   $1 = substr($1, 2)
+   print >out
+   next
+}
+
+# Check for unrecognized lines, because of the preprocessing chunk
+# format errors will be detected on the first pass independent of
+# any other format errors.
+{
+   print "options.awk: bad line (" NR "):", $0
+   err = 1 # prevent END{} running
+   exit 1
+}
+
+# For checking purposes names that start with "ok_" or "fail_" are
+# not output to pnglibconf.h and must be either enabled or disabled
+# respectively for the build to succeed.  This allows interdependencies
+# between options of the form "at least one of" or "at most one of"
+# to be checked.  For example:
+#
+# option FLOATING_POINT enables ok_math
+# option FIXED_POINT enables ok_math
+#   This ensures that at least one of FLOATING_POINT and FIXED_POINT
+#   must be set for the build to succeed.
+#
+# option fail_math requires FLOATING_POINT FIXED_POINT
+#   This means the build will fail if *both* FLOATING_POINT and
+#   FIXED_POINT are set (this is an example; in fact both are allowed.)
+#
+# If all these options were given the build would require exactly one
+# of the names to be enabled.
+
+END{
+   # END{} gets run on an exit (a traditional awk feature)
+   if (err) exit 1
+
+   if (pre) {
+      # Record the final value of the variables
+      print "deb =", deb >out
+      if (everything != "") {
+         print "everything =", everything >out
+      }
+      print "logunsupported =", logunsupported >out
+      exit 0
+   }
+
+   # Do the options first (allowing options to set settings).  The dependency
+   # tree is thus:
+   #
+   #   name     >     name
+   #   name requires  name
+   #   name if        name
+   #   name enabledby name
+   #
+   # First build a list 'tree' by option of all the things on which
+   # it depends.
+   print "" >out
+   print "/* OPTIONS */" >out
+   print comment, "options", cend >out
+   for (opt in enabledby) tree[opt] = 1  # may not be explicit options
+   for (opt in option) if (opt != "") {
+      o = option[opt]
+      # option should always be one of the following values
+      if (o != "on" && o != "off" && o != "disabled" && o != "enabled") {
+         print "internal option error (" o ")"
+         exit 1
+      }
+      tree[opt] = ""   # so unlisted options marked
+   }
+   for (opt in tree) if (opt != "") {
+      if (tree[opt] == 1) {
+         tree[opt] = ""
+         if (option[opt] != "") {
+            print "internal error (1)"
+            exit 1
+         }
+         # Macros only listed in 'enables' remain off unless
+         # one of the enabling macros is on.
+         option[opt] = "disabled"
+      }
+
+      split("", list) # clear 'list'
+      # Now add every requires, iffs or enabledby entry to 'list'
+      # so that we can add a unique list of requirements to tree[i]
+      split(requires[opt] iffs[opt] enabledby[opt], r)
+      for (i in r) list[r[i]] = 1
+      for (i in list) tree[opt] = tree[opt] " " i
+   }
+
+   # print the tree for extreme debugging
+   if (deb > 2) for (i in tree) if (i != "") print i, "depends-on" tree[i]
+
+   # Ok, now check all options marked explicitly 'on' or 'off':
+   #
+   # If an option[opt] is 'on' then turn on all requires[opt]
+   # If an option[opt] is 'off' then turn off all enabledby[opt]
+   #
+   # Error out if we have to turn 'on' to an 'off' option or vice versa.
+   npending = 0
+   for (opt in option) if (opt != "") {
+      if (option[opt] == "on" || option[opt] == "off") {
+         pending[++npending] = opt
+      }
+   }
+
+   err = 0 # set on error
+   while (npending > 0) {
+      opt = pending[npending--]
+      if (option[opt] == "on") {
+         nreqs = split(requires[opt], r)
+         for (j=1; j<=nreqs; ++j) {
+            if (option[r[j]] == "off") {
+               print "option", opt, "turned on, but requirement", r[j], "is turned off"
+               err = 1
+            } else if (option[r[j]] != "on") {
+               option[r[j]] = "on"
+               pending[++npending] = r[j]
+            }
+         }
+      } else {
+         if (option[opt] != "off") {
+            print "internal error (2)"
+            exit 1
+         }
+         nreqs = split(enabledby[opt], r)
+         for (j=1; j<=nreqs; ++j) {
+            if (option[r[j]] == "on") {
+               print "option", opt, "turned off, but enabled by", r[j], "which is turned on"
+               err = 1
+            } else if (option[r[j]] != "off") {
+               option[r[j]] = "off"
+               pending[++npending] = r[j]
+            }
+         }
+      }
+   }
+   if (err) exit 1
+
+   # Sort options:
+   print "PNG_DFN_START_SORT 2" >out
+
+   # option[i] is now the complete list of all the tokens we may
+   # need to output, go through it as above, depth first.
+   finished = 0
+   while (!finished) {
+      finished = 1
+      movement = 0 # done nothing
+      for (i in option) if (!done[i]) {
+         nreqs = split(tree[i], r)
+         if (nreqs > 0) {
+            for (j=1; j<=nreqs; ++j) if (!done[r[j]]) {
+               break
+            }
+            if (j<=nreqs) {
+               finished = 0
+               continue  # next option
+            }
+         }
+
+         # All the requirements have been processed, output
+         # this option.  An option is _SUPPORTED if:
+         #
+         # all 'requires' are _SUPPORTED AND
+         # at least one of the 'if' options are _SUPPORTED AND
+         # EITHER:
+         #   The name is _SUPPORTED (on the command line)
+         # OR:
+         #   an 'enabledby' is _SUPPORTED
+         # OR:
+         #   NO_name is not defined AND
+         #   the option is not disabled; an option is disabled if:
+         #    option == off
+         #    option == disabled && everything != on
+         #    option == "" && everything == off
+         if (deb) print "option", i
+         print "" >out
+         print "/* option:", i, option[i] >out
+         print " *   requires:  " requires[i] >out
+         print " *   if:        " iffs[i] >out
+         print " *   enabled-by:" enabledby[i] >out
+         print " *   sets:      " sets[i], "*/" >out
+         print "#undef PNG_on" >out
+         print "#define PNG_on 1" >out
+
+         # requires
+         nreqs = split(requires[i], r)
+         for (j=1; j<=nreqs; ++j) {
+            print "#ifndef PNG_" r[j] "_SUPPORTED" >out
+            print "#   undef PNG_on /*!" r[j] "*/" >out
+            # This error appears in the final output if something
+            # was switched 'on' but the processing above to force
+            # the requires did not work
+            if (option[i] == "on") {
+               print error, i, "requires", r[j] end >out
+            }
+            print "#endif" >out
+         }
+
+         # if
+         have_ifs = 0
+         nreqs = split(iffs[i], r)
+         print "#undef PNG_no_if" >out
+         if (nreqs > 0) {
+            have_ifs = 1
+            print "/* if" iffs[i], "*/" >out
+            print "#define PNG_no_if 1" >out
+            for (j=1; j<=nreqs; ++j) {
+               print "#ifdef PNG_" r[j] "_SUPPORTED" >out
+               print "#   undef PNG_no_if /*" r[j] "*/" >out
+               print "#endif" >out
+            }
+            print "#ifdef PNG_no_if /*missing if*/" >out
+            print "#   undef PNG_on" >out
+            # There is no checking above for this, because we
+            # don't know which 'if' to choose, so whine about
+            # it here:
+            if (option[i] == "on") {
+               print error, i, "needs one of:", iffs[i] end >out
+            }
+            print "#endif" >out
+         }
+
+         print "#ifdef PNG_on /*requires, if*/" >out
+         # enables
+         print "#   undef PNG_not_enabled" >out
+         print "#   define PNG_not_enabled 1" >out
+         print "   /* enabled by" enabledby[i], "*/" >out
+         nreqs = split(enabledby[i], r)
+         for (j=1; j<=nreqs; ++j) {
+            print "#ifdef PNG_" r[j] "_SUPPORTED" >out
+            print "#   undef PNG_not_enabled /*" r[j] "*/" >out
+            # Oops, probably not intended (should be factored
+            # out by the checks above).
+            if (option[i] == "off") {
+               print error, i, "enabled by:", r[j] end >out
+            }
+            print "#endif" >out
+         }
+
+         print "#   ifndef PNG_" i "_SUPPORTED /*!command line*/" >out
+         print "#    ifdef PNG_not_enabled /*!enabled*/" >out
+         # 'have_ifs' here means that everything = "off" still allows an 'if' on
+         # an otherwise enabled option to turn it on; otherwise the 'if'
+         # handling is effectively disabled by 'everything = off'
+         if (option[i] == "off" || option[i] == "disabled" && everything != "on" || option[i] == "enabled" && everything == "off" && !have_ifs) {
+            print "#      undef PNG_on /*default off*/" >out
+         } else {
+            print "#      ifdef PNG_NO_" i >out
+            print "#       undef PNG_on /*turned off*/" >out
+            print "#      endif" >out
+            print "#      ifdef PNG_NO_" i "_SUPPORTED" >out
+            print "#       undef PNG_on /*turned off*/" >out
+            print "#      endif" >out
+         }
+         print "#    endif /*!enabled*/" >out
+         print "#    ifdef PNG_on" >out
+         # The _SUPPORTED macro must be defined so that dependent
+         # options output later work.
+         print "#      define PNG_" i "_SUPPORTED" >out
+         print "#    endif" >out
+         print "#   endif /*!command line*/" >out
+         # If PNG_on is still set the option should be defined in
+         # pnglibconf.h
+         print "#   ifdef PNG_on" >out
+         if (i ~ /^fail_/) {
+            print error, i, "is on: enabled by:" iffs[i] enabledby[i] ", requires" requires[i] end >out
+         } else if (i !~ /^ok_/) {
+            print def i sup >out
+            # Supported option, set required settings
+            nreqs = split(sets[i], r)
+            for (j=1; j<=nreqs; ++j) {
+               print "#    ifdef PNG_set_" r[j] >out
+               # Some other option has already set a value:
+               print error, i, "sets", r[j] ": duplicate setting" end >out
+               print error, "   previous value: " end "PNG_set_" r[j] >out
+               print "#    else" >out
+               # Else set the default: note that this won't accept arbitrary
+               # values, the setval string must be acceptable to all the C
+               # compilers we use.  That means it must be VERY simple; a number,
+               # a name or a string.
+               print "#     define PNG_set_" r[j], setval[i " " r[j]] >out
+               print "#    endif" >out
+            }
+         }
+         print "#   endif /* definition */" >out
+         print "#endif /*requires, if*/" >out
+         if (logunsupported || i ~ /^ok_/) {
+            print "#ifndef  PNG_on" >out
+            if (logunsupported) {
+               print und i une >out
+            }
+            if (i ~ /^ok_/) {
+               print error, i, "not enabled: requires:" requires[i] ", enabled by:" iffs[i] enabledby[i] end >out
+            }
+            print "#endif" >out
+         }
+
+         done[i] = 1
+         ++movement
+      }
+
+      if (!finished && !movement) {
+         print "option: loop or missing option in dependency tree, cannot process:"
+         for (i in option) if (!done[i]) {
+            print "  option", i, "depends on" tree[i], "needs:"
+            nreqs = split(tree[i], r)
+            if (nreqs > 0) for (j=1; j<=nreqs; ++j) if (!done[r[j]]) {
+               print "   " r[j]
+            }
+         }
+         exit 1
+      }
+   }
+   print "PNG_DFN_END_SORT" >out
+   print comment, "end of options", cend >out
+
+   # Do the 'setting' values second, the algorithm the standard
+   # tree walk (O(1)) done in an O(2) while/for loop; iterations
+   # settings x depth, outputting the deepest required macros
+   # first.
+   print "" >out
+   print "/* SETTINGS */" >out
+   print comment, "settings", cend >out
+   # Sort (in dfn.awk) on field 2, the setting name
+   print "PNG_DFN_START_SORT 2" >out
+   finished = 0
+   while (!finished) {
+      finished = 1
+      movement = 0 # done nothing
+      for (i in setting) if (!doneset[i]) {
+         nreqs = split(setting[i], r)
+         if (nreqs > 0) {
+            # By default assume the requires values are options, but if there
+            # is no option with that name check for a setting
+            for (j=1; j<=nreqs; ++j) if (option[r[j]] == "" && !doneset[r[j]]) {
+               break
+            }
+            if (j<=nreqs) {
+               finished = 0
+               continue # try a different setting
+            }
+         }
+
+         # All the requirements have been processed, output
+         # this setting.
+         if (deb) print "setting", i
+         deflt = defaults[i]
+         # Remove any spurious trailing spaces
+         sub(/ *$/,"",deflt)
+         # A leading @ means leave it unquoted so the preprocessor
+         # can substitute the build time value
+         if (deflt ~ /^ @/)
+            deflt = " " subs substr(deflt, 3) sube
+         print "" >out
+         print "/* setting: ", i >out
+         print " *   requires:" setting[i] >out
+         print " *   default: ", defaults[i] deflt, "*/" >out
+         for (j=1; j<=nreqs; ++j) {
+            if (option[r[j]] != "")
+               print "#ifndef PNG_" r[j] "_SUPPORTED" >out
+            else
+               print "#ifndef PNG_" r[j] >out
+            print error, i, "requires", r[j] end >out
+            print "# endif" >out
+         }
+         # The precedence is:
+         #
+         #  1) External definition; trumps:
+         #  2) Option 'sets' value; trumps:
+         #  3) Setting 'default'
+         #
+         print "#ifdef PNG_" i >out
+         # PNG_<i> is defined, so substitute the value:
+         print def i, subs "PNG_" i sube end >out
+         print "#else /* use default */" >out
+         print "# ifdef PNG_set_" i >out
+         # Value from an option 'sets' argument
+         print def i, subs "PNG_set_" i sube end >out
+         # This is so that subsequent tests on the setting work:
+         print "#  define PNG_" i, "1" >out
+         if (defaults[i] != "") {
+            print "# else /*default*/" >out
+            print def i deflt end >out
+            print "#  define PNG_" i, "1" >out
+         }
+         print "# endif /* defaults */" >out
+         print "#endif /* setting", i, "*/" >out
+
+         doneset[i] = 1
+         ++movement
+      }
+
+      if (!finished && !movement) {
+         print "setting: loop or missing setting in 'requires', cannot process:"
+         for (i in setting) if (!doneset[i]) {
+            print "  setting", i, "requires" setting[i]
+         }
+         exit 1
+      }
+   }
+   print "PNG_DFN_END_SORT" >out
+   print comment, "end of settings", cend >out
+
+   # Regular end - everything looks ok
+   if (protect != "") {
+      print start "#endif", "/*", protect, "*/" end >out
+   }
+}
--- a/xs/src/png/libpng/scripts/pnglibconf.dfa
+++ b/xs/src/png/libpng/scripts/pnglibconf.dfa
@ -0,0 +1,919 @@
+# scripts/pnglibconf.dfa - library build configuration control
+#
+@/*- pnglibconf.dfn intermediate file
+@ *  generated from scripts/pnglibconf.dfa
+@ */
+#
+com pnglibconf.h - library build configuration
+com
+version
+com
+com Copyright (c) 1998-2017 Glenn Randers-Pehrson
+com
+com This code is released under the libpng license.
+com For conditions of distribution and use, see the disclaimer
+com and license in png.h
+com
+
+file pnglibconf.h scripts/pnglibconf.dfa PNGLCONF_H
+
+# This file is preprocessed by scripts/options.awk and the
+# C compiler to generate 'pnglibconf.h' - a list of all the
+# configuration options.  The file lists the various options
+# that can *only* be specified during the libpng build;
+# pnglibconf.h freezes the definitions selected for the specific
+# build.
+#
+# The syntax is detailed in scripts/options.awk; this is a summary
+# only:
+#
+# setting <name> [requires ...] [default]
+#    #define PNG_<name> <value>  /* value comes from current setting */
+# option <name> [requires ...] [if ...] [enables ...] [disabled]
+#    #define PNG_<name>_SUPPORTED if the requirements are met and
+#    enable the other options listed
+# chunk <name> [requires ...] [enables ...] [disabled]
+#    Enable chunk processing for the given ancillary chunk; any
+#    'requires something' expands to READ_something for read and
+#    WRITE_something for write, but the enables list members are
+#    used as given (e.g. enables GAMMA just expands to that on the
+#    correspond READ_name and WRITE_name lines.)
+#
+# "," may be used to separate options on an 'option' line and is ignored; it
+# doesn't change the meaning of the line.  (NOT setting, where "," becomes
+# part of the setting!)  A comma at the end of an option line causes a
+# continuation (the next line is included in the option too.)
+#
+# Note that the 'on' and 'off' keywords, while valid on both option
+# and chunk, should not be used in this file because they force the
+# relevant options on or off.
+
+#----------------------------------------------------------------------
+
+# The following setting, option and chunk values can all be changed
+# while building libpng:
+#
+# setting: change 'setting' lines to fine tune library performance;
+#   changes to the settings don't affect the libpng API functionally
+#
+# option: change 'option' lines to remove or add capabilities from
+#   or to the library; options change the library API
+#
+# chunk: change 'chunk' lines to remove capabilities to process
+#   optional ('ancillary') chunks.  This does not prevent PNG
+#   decoding but does change the libpng API because some chunks
+#   will be ignored.
+#
+# There are three ways of disabling features, in no particular order:
+#
+# 1) Create 'pngusr.h', enter the required private build information
+# detailed below and #define PNG_NO_<option> for each option you
+# don't want in that file in that file.  You can also turn on options
+# using PNG_<option>_SUPPORTED.  When you have finished rerun
+# configure and rebuild pnglibconf.h file with -DPNG_USER_CONFIG:
+#
+#  make clean
+#  CPPFLAGS='-DPNG_USER_CONFIG' ./configure
+#  make pnglibconf.h
+#
+# pngusr.h is only used during the creation of pnglibconf.h, but it
+# is safer to ensure that -DPNG_USER_CONFIG is specified throughout
+# the build by changing the CPPFLAGS passed to the initial ./configure
+#
+# 2) Add definitions of the settings you want to change to
+# CPPFLAGS; for example:
+#
+#   -DPNG_DEFAULT_READ_MACROS=0
+#
+# (This would change the default to *not* use read macros.)  Be
+# very careful to change only settings that don't alter the API
+# because this approach bypasses the private build checking.  You
+# can also change settings from pngpriv.h (read pngpriv.h) safely
+# without API changes.  Do that in the same way.
+#
+# 3) Write a new '.dfa' file (say 'pngusr.dfa') and in this file
+# provide override values for setting entries and turn option or
+# chunk values explicitly 'on' or 'off':
+#
+#    setting FOO default VALUE
+#    option BAR [on|off]
+#
+# Then add this file to the options.awk command line (the *first*
+# one) after this file.  The make macro DFA_XTRA is provided to make
+# this easier (set it like CPPFLAGS prior to running ./configure).
+# Look at the builds below contrib/pngminim for some extreme examples
+# of how this can be used.
+#
+# Don't edit this file unless you are contributing a patch to
+# libpng and need new or modified options/settings.
+#----------------------------------------------------------------------
+
+# The following causes commented out #undef lines to be written to
+# pnglibconf.h; this can be stopped by logunsupported=0 in a later
+# file or on the command line (after pnglibconf.dfa)
+
+logunsupported = 1
+
+# The following allows the output from configure to modify the contents of
+# pnglibconf.h
+
+@#ifdef HAVE_CONFIG_H
+@#  include "config.h"
+@#endif
+
+# PNG_USER_CONFIG has to be defined on the compiler command line
+# to cause pngusr.h to be read while constructing pnglibconf.h
+#
+# If you create a private DLL you need to define the following
+# macros in the file 'pngusr.h' and set -DPNG_USER_CONFIG for
+# compilation (i.e. in CPPFLAGS.)
+# #define PNG_USER_PRIVATEBUILD \
+#     <Describes by whom and why this version of the DLL was built>
+#  e.g. #define PNG_USER_PRIVATEBUILD "Build by MyCompany for xyz reasons."
+# #define PNG_USER_DLLFNAME_POSTFIX <two-letter postfix that serve to
+#        distinguish your DLL from those of the official release. These
+#        correspond to the trailing letters that come after the version
+#        number and must match your private DLL name>
+#  e.g. // private DLL "libpng13gx.dll"
+#       #define PNG_USER_DLLFNAME_POSTFIX "gx"
+#
+# The following macros are also at your disposal if you want to complete the
+# DLL VERSIONINFO structure.
+# - PNG_USER_VERSIONINFO_COMMENTS
+# - PNG_USER_VERSIONINFO_COMPANYNAME
+# - PNG_USER_VERSIONINFO_LEGALTRADEMARKS
+
+# It is necessary to include configures definitions here so that AC_DEFINE
+# in configure.ac works in a comprehensible way
+@#if defined(HAVE_CONFIG_H) && !defined(PNG_NO_CONFIG_H)
+@#  include "config.h"
+@#endif
+
+@#ifdef PNG_USER_CONFIG
+@#  include "pngusr.h"
+@#endif
+
+# This is a special fixup for the Watcom C compiler on Windows, which has
+# multiple procedure call standards.  Unless PNG_API_RULE is set explicitly
+# (i.e. if it is not defined at this point) it will be forced to '2' here when
+# using Watcom.  This indicates to the other header files that Watcom behaviour
+# is required where appropriate.
+
+@#ifdef __WATCOMC__
+@#  ifndef PNG_API_RULE
+@#     define PNG_API_RULE 2 /* Use Watcom calling conventions */
+@#  endif
+@#endif
+
+# IN DEVELOPMENT
+# These are currently experimental features; define them if you want (NOTE:
+# experimental options must be disabled before they are defined in this file!)
+
+# NONE
+
+# Note that PNG_USER_CONFIG only has an effect when building
+# pnglibconf.h
+
+setting USER_CONFIG
+setting USER_PRIVATEBUILD
+setting USER_DLLFNAME_POSTFIX
+setting USER_VERSIONINFO_COMMENTS
+setting USER_VERSIONINFO_COMPANYNAME
+setting USER_VERSIONINFO_LEGALTRADEMARKS
+
+# Record the 'API rule' used to select calling conventions on
+# those systems that support such things (see all the comments in
+# pngconf.h)
+# Changing this setting has a fundamental affect on the PNG ABI,
+# do not release shared libraries with this changed.
+
+setting API_RULE default 0
+
+# This allows a prefix to be added to the front of every API function name (and
+# therefore every symbol) by redefining all the function names with the prefix
+# at the end of pnglibconf.h.  It also turns on similar internal symbol renaming
+# by causing a similar build-time only file, pngprefix.h, to be generated.
+
+setting PREFIX
+
+# Implementation specific control of the optimizations, enabled by those
+# hardware or software options that need it (typically when run-time choices
+# must be made by the user)
+option SET_OPTION disabled
+
+# These options are specific to the ARM NEON hardware optimizations.  At present
+# these optimizations depend on GCC specific pre-processing of an assembler (.S)
+# file so they probably won't work with other compilers.
+#
+# ARM_NEON_OPT: unset: check at compile time (__ARM_NEON__ must be defined by
+#                      the compiler, typically as a result of specifying
+#                      CC="gcc -mfpu=neon".)
+#                   0: disable (even if the CPU has a NEON FPU.)
+#                   1: check at run time (via ARM_NEON_{API,CHECK})
+#                   2: switch on unconditionally (inadvisable - instead pass
+#                      -mfpu=neon to GCC in CC)
+#           When building libpng avoid using any setting other than '0'; '1' is
+#           set automatically when either 'API' or 'CHECK' are configured in,
+#           '2' should not be necessary as -mfpu=neon will achieve the same
+#           effect as well as applying NEON optimizations to the rest of the
+#           libpng code.
+#           NOTE: any setting other than '0' requires ALIGNED_MEMORY
+# ARM_NEON_API:   (PNG_ARM_NEON == 1) allow the optimization to be switched on
+#                 with png_set_option
+# ARM_NEON_CHECK: (PNG_ARM_NEON == 1) compile a run-time check to see if Neon
+#                 extensions are supported. This is poorly supported and
+#                 deprecated - use the png_set_option API.
+setting ARM_NEON_OPT
+option ARM_NEON_API disabled requires ALIGNED_MEMORY enables SET_OPTION,
+   sets ARM_NEON_OPT 1
+option ARM_NEON_CHECK disabled requires ALIGNED_MEMORY,
+   sets ARM_NEON_OPT 1
+
+# These options are specific to the PowerPC VSX hardware optimizations.
+#
+# POWERPC_VSX_OPT: unset: check at compile time (__PPC64__,__ALTIVEC__,__VSX__
+#                      must be defined by the compiler, typically as a result
+#                      of specifying
+#                      "-mvsx -maltivec" compiler flags)
+#                   0: disable (even if the CPU supports VSX.)
+#                   1: check at run time (via POWERPC_VSX_{API,CHECK})
+#                   2: switch on unconditionally (inadvisable - instead pass
+#                      -mvsx -maltivec to compiler options)
+#           When building libpng avoid using any setting other than '0'; '1' is
+#           set automatically when either 'API' or 'CHECK' are configured in,
+#           '2' should not be necessary as "-mvsx -maltivec" will achieve the same
+#           effect as well as applying VSX optimizations to the rest of the
+#           libpng code.
+# POWERPC_VSX_API:   (PNG_POWERPC_VSX == 1) allow the optimization to be switched on
+#                 with png_set_option
+# POWERPC_VSX_CHECK: (PNG_POWERPC_VSX == 1) compile a run-time check to see if VSX
+#                 extensions are supported. This is supported not for all OSes
+#                 (see contrib/powerpc/README)
+setting POWERPC_VSX_OPT
+option POWERPC_VSX_API disabled enables SET_OPTION,
+  sets POWERPC_VSX_OPT 1
+option POWERPC_VSX_CHECK disabled,
+  sets POWERPC_VSX_OPT 1
+
+
+# These settings configure the default compression level (0-9) and 'strategy';
+# strategy is as defined by the implementors of zlib. It describes the input
+# data and modifies the zlib parameters in an attempt to optimize the balance
+# between search and huffman encoding in the zlib algorithms.  The defaults are
+# the zlib.h defaults - the apparently recursive definition does not arise
+# because the name of the setting is prefixed by PNG_
+#
+# The TEXT values are the defaults when writing compressed text (all forms)
+
+# Include the zlib header so that the defaults below are known
+@#  include <zlib.h>
+
+# The '@' here means to substitute the value when pnglibconf.h is built
+setting Z_DEFAULT_COMPRESSION default @Z_DEFAULT_COMPRESSION
+# TODO: why aren't these Z_RLE; zlib.h says that Z_RLE, specifically, is
+# appropriate for PNG images, maybe it doesn't exist in all versions?
+setting Z_DEFAULT_STRATEGY default @Z_FILTERED
+setting Z_DEFAULT_NOFILTER_STRATEGY default @Z_DEFAULT_STRATEGY
+setting ZLIB_VERNUM default @ZLIB_VERNUM
+
+# Linkage of:
+#
+#  API:      libpng API functions
+#  CALLBACK: internal non-file-local callbacks
+#  FUNCTION: internal non-file-local functions
+#  DATA:     internal non-file-local (const) data
+setting LINKAGE_API default extern
+setting LINKAGE_CALLBACK default extern
+setting LINKAGE_FUNCTION default extern
+setting LINKAGE_DATA default extern
+
+setting TEXT_Z_DEFAULT_COMPRESSION default @Z_DEFAULT_COMPRESSION
+setting TEXT_Z_DEFAULT_STRATEGY default @Z_DEFAULT_STRATEGY
+
+# Default to using the read macros
+
+setting DEFAULT_READ_MACROS default 1
+
+# The alternative is to call functions to read PNG values, if
+# the functions are turned *off* the read macros must always
+# be enabled, so turning this off will actually force the
+# USE_READ_MACROS option on (see pngconf.h)
+
+option READ_INT_FUNCTIONS requires READ
+
+# The same for write  but these can only be switched off if no writing
+# is required at all - hence the use of a 'disabled', not a 'requires'.
+# If these are needed, they are enabled in the 'WRITE options' section
+# below.
+
+option WRITE_INT_FUNCTIONS disabled
+
+# Error controls
+#
+# WARNINGS: normally on, if off no warnings are generated
+# ERROR_TEXT: normally on, if off errors happen but there is no message
+# ERROR_NUMBERS: unimplemented feature, therefore disabled
+# BENIGN_ERRORS: support for just issuing warnings for recoverable errors
+#
+# BENIGN_READ_ERRORS:
+#     By default recoverable errors on read should just generate warnings,
+#     generally safe but PNG files that don't conform to the specification will
+#     be accepted if a meaningful result can be produced.
+#
+# BENIGN_WRITE_ERRORS:
+#     By default recoverable errors on write should just generate warnings,
+#     not generally safe because this allows the application to write invalid
+#     PNG files.  Applications should enable this themselves; it's useful
+#     because it means that a failure to write an ancillary chunk can often be
+#     ignored.
+
+option WARNINGS
+option ERROR_TEXT
+option ERROR_NUMBERS disabled
+
+option BENIGN_ERRORS
+option BENIGN_WRITE_ERRORS requires BENIGN_ERRORS disabled
+option BENIGN_READ_ERRORS requires BENIGN_ERRORS
+
+
+# Generic options - affect both read and write.
+
+option MNG_FEATURES
+
+# Arithmetic options, the first is the big switch that chooses between internal
+# floating and fixed point arithmetic implementations - it does not affect any
+# APIs.  The second two (the _POINT settings) switch off individual APIs.
+#
+# Prior to libpng 1.6.8 one of the API (_POINT) variants had to be selected.  At
+# 1.6.8 this restriction has been removed; the simplified API can be used
+# without enabling any of the low level fixed/floating APIs.
+
+option FLOATING_ARITHMETIC
+option FLOATING_POINT
+option FIXED_POINT
+
+# This protects us against compilers that run on a windowing system
+# and thus don't have or would rather us not use the stdio types:
+# stdin, stdout, and stderr.  The only one currently used is stderr
+# in png_error() and png_warning().  #defining PNG_NO_CONSOLE_IO will
+# prevent these from being compiled and used. #defining PNG_NO_STDIO
+# will also prevent these, plus will prevent the entire set of stdio
+# macros and functions (FILE *, printf, etc.) from being compiled and used,
+# unless (PNG_DEBUG > 0) has been #defined.
+
+option STDIO
+option CONSOLE_IO requires STDIO
+
+# Note: prior to 1.5.0 this option could not be disabled if STDIO
+# was enabled.  Prior to 1.5.3 this option required STDIO
+
+option TIME_RFC1123
+
+# PNG_SETJMP_NOT_SUPPORTED is an old equivalent for NO_SETJMP
+
+option SETJMP
+= NO_SETJMP SETJMP_NOT_SUPPORTED
+
+# If this is disabled it is not possible for apps to get the
+# values from the 'info' structure, this effectively removes
+# quite a lot of the READ API.
+
+option EASY_ACCESS
+
+# Added at libpng-1.2.0
+
+option USER_MEM
+
+# Added at libpng-1.4.0
+
+option IO_STATE
+
+# Libpng limits: limit the size of images and data on read.
+#
+# If this option is disabled all the limit checking code will be disabled:
+
+option USER_LIMITS requires READ
+
+# The default settings given below for the limits mean that libpng will
+# limit the size of images or the size of data in ancillary chunks to less
+# than the specification or implementation limits. Settings have the
+# following interpretations:
+#
+# USER_WIDTH_MAX: maximum width of an image that will be read
+# USER_HEIGHT_MAX: maximum height
+# USER_CHUNK_MALLOC_MAX: maximum in-memory (decompressed) size of a single chunk
+# USER_CHUNK_CACHE_MAX: maximum number of chunks to be cached
+#
+# Only chunks that are variable in number are counted towards the
+
+# Use 0x7fffffff for unlimited
+setting USER_WIDTH_MAX default        1000000
+setting USER_HEIGHT_MAX default       1000000
+
+# Use 0 for unlimited
+setting USER_CHUNK_CACHE_MAX default     1000
+setting USER_CHUNK_MALLOC_MAX default 8000000
+
+# If this option is enabled APIs to set the above limits at run time are added;
+# without this the hardwired (compile time) limits will be used.
+option SET_USER_LIMITS requires USER_LIMITS
+
+# All of the following options relate to code capabilities for
+# processing image data before creating a PNG or after reading one.
+# You can remove these capabilities safely and still be PNG
+# conformant, however the library that results is still non-standard.
+# See the comments above about how to change options and settings.
+
+# READ options
+#
+# WARNING: in libpng 1.5 maintained configuration compatibility with earlier
+# versions.  In some cases turning off an option turned off other options, in
+# others it was ineffective unless dependent options were also turned off.
+# Libpng 1.6 changes this: in general if you turn off an option that affects
+# APIs it stays off and simply disables APIs that depend on it.
+#
+# As a result if you simply port the libpng 1.5 configuration to libpng 1.6 you
+# will probably see build failures due to missing APIs.  Fixing these failures
+# requires some, perhaps considerable, knowledge of what your libpng using
+# applications are doing, fortunately there is no great reason for you to move
+# to libpng 1.6; the new interfaces in 1.6 will take several years to become
+# popular.
+
+option READ enables READ_INTERLACING SET_OPTION
+
+# Disabling READ_16BIT does not disable reading 16-bit PNG files, but it
+# forces them to be chopped down to 8-bit, and disables any 16-bit
+# processing after that has happened.  You need to be sure to enable
+# READ_SCALE_16_TO_8 or READ_STRIP_16_TO_8 when you disable READ_16BIT for
+# this to work properly.  You should disable the other option if you need to
+# ensure a particular conversion (otherwise the app can chose.)
+
+option READ_16BIT requires READ enables 16BIT
+
+option READ_QUANTIZE requires READ
+
+option READ_TRANSFORMS requires READ
+= NO_READ_TRANSFORMS READ_TRANSFORMS_NOT_SUPPORTED
+
+# Read gamma handling.  Gamma processing is a core part of libpng and many of
+# the capabilities are dependent on libpng performing gamma correction.
+#
+# In libpng 1.6 disabling gamma processing (setting PNG_NO_READ_GAMMA)
+# consistently disables those parts of the API that depend on it.  Prior to
+# 1.6.0 this was not true; the results were unpredictable and varied between
+# releases.
+#
+# If you disable gamma processing and your program no longer compiles you need
+# to ask whether you really need the APIs that are missing.  If you do then you
+# almost certainly need the gamma processing.
+#
+# If you handle gamma issues outside libpng then you do not need the libpng
+# gamma processing; and it is an enormous waste of space.  You just need to
+# remove the use of libpng APIs that depend on it.
+option READ_GAMMA requires READ_TRANSFORMS, READ_gAMA, READ_sRGB
+
+option READ_ALPHA_MODE requires READ_TRANSFORMS, READ_GAMMA
+option READ_BACKGROUND requires READ_TRANSFORMS, READ_STRIP_ALPHA, READ_GAMMA
+option READ_BGR requires READ_TRANSFORMS
+option READ_EXPAND_16 requires READ_TRANSFORMS, READ_16BIT, READ_EXPAND
+option READ_EXPAND requires READ_TRANSFORMS
+option READ_FILLER requires READ_TRANSFORMS
+option READ_GRAY_TO_RGB requires READ_TRANSFORMS
+option READ_INVERT_ALPHA requires READ_TRANSFORMS
+option READ_INVERT requires READ_TRANSFORMS
+option READ_PACK requires READ_TRANSFORMS
+option READ_PACKSWAP requires READ_TRANSFORMS
+option READ_RGB_TO_GRAY requires READ_TRANSFORMS, READ_GAMMA enables COLORSPACE
+option READ_SCALE_16_TO_8 requires READ_TRANSFORMS
+option READ_SHIFT requires READ_TRANSFORMS
+option READ_STRIP_16_TO_8 requires READ_TRANSFORMS
+option READ_STRIP_ALPHA requires READ_TRANSFORMS
+option READ_SWAP_ALPHA requires READ_TRANSFORMS
+option READ_SWAP requires READ_TRANSFORMS, READ_16BIT
+option READ_USER_TRANSFORM requires READ_TRANSFORMS
+
+option PROGRESSIVE_READ requires READ
+option SEQUENTIAL_READ requires READ
+
+# You can define PNG_NO_PROGRESSIVE_READ if you don't do progressive reading.
+# This is not talking about interlacing capability!  You'll still have
+# interlacing unless you turn off the following which is required
+# for PNG-compliant decoders.  (In other words, do not do this - in
+# fact it can't be disabled from the command line!)
+#option READ_INTERLACING requires READ
+
+option READ_COMPOSITE_NODIV requires READ
+= NO_READ_COMPOSITE_NODIV NO_READ_COMPOSITED_NODIV
+
+# Inch conversions
+
+option INCH_CONVERSIONS
+= INCH_CONVERSIONS INCH_CONVERSIONS
+
+# API to build a grayscale palette
+# NOTE: this is not used internally by libpng at present.
+
+option BUILD_GRAYSCALE_PALETTE
+
+# WRITE options
+
+option WRITE enables WRITE_INT_FUNCTIONS
+
+# Disabling WRITE_16BIT prevents 16-bit PNG files from being
+# generated.
+option WRITE_16BIT requires WRITE enables 16BIT
+
+option WRITE_TRANSFORMS requires WRITE
+= NO_WRITE_TRANSFORMS WRITE_TRANSFORMS_NOT_SUPPORTED
+
+option WRITE_SHIFT requires WRITE_TRANSFORMS
+option WRITE_PACK requires WRITE_TRANSFORMS
+option WRITE_BGR requires WRITE_TRANSFORMS
+option WRITE_SWAP requires WRITE_TRANSFORMS, WRITE_16BIT
+option WRITE_PACKSWAP requires WRITE_TRANSFORMS
+option WRITE_INVERT requires WRITE_TRANSFORMS
+option WRITE_FILLER requires WRITE_TRANSFORMS
+option WRITE_SWAP_ALPHA requires WRITE_TRANSFORMS
+option WRITE_INVERT_ALPHA requires WRITE_TRANSFORMS
+option WRITE_USER_TRANSFORM requires WRITE_TRANSFORMS
+
+# This is not required for PNG-compliant encoders, but can cause
+# trouble if left undefined
+
+option WRITE_INTERLACING requires WRITE
+
+# Deprecated, will be removed.
+option WRITE_WEIGHTED_FILTER requires WRITE
+
+option WRITE_FLUSH requires WRITE
+
+# Note: these can be turned off explicitly if not required by the
+# apps implementing the user transforms
+option USER_TRANSFORM_PTR if READ_USER_TRANSFORM, WRITE_USER_TRANSFORM
+option USER_TRANSFORM_INFO if READ_USER_TRANSFORM, WRITE_USER_TRANSFORM
+
+# This enables API to set compression parameters for compressing
+# non-IDAT chunks (zTXt, iTXt, iCCP, and unknown chunks).  This feature
+# was added at libpng-1.5.3.
+option WRITE_CUSTOMIZE_ZTXT_COMPRESSION requires WRITE
+option WRITE_CUSTOMIZE_COMPRESSION requires WRITE
+
+# Any chunks you are not interested in, you can undef here.  The
+# ones that allocate memory may be especially important (hIST,
+# tEXt, zTXt, tRNS, pCAL).  Others will just save time and make png_info
+# a bit smaller.
+
+# The size of the png_text structure changed in libpng-1.0.6 when
+# iTXt support was added.  iTXt support was turned off by default through
+# libpng-1.2.x, to support old apps that malloc the png_text structure
+# instead of calling png_set_text() and letting libpng malloc it.  It
+# was turned on by default in libpng-1.4.0.
+
+option READ_ANCILLARY_CHUNKS requires READ
+# PNG_READ_ANCILLARY_CHUNKS_NOT_SUPPORTED is deprecated.
+= NO_READ_ANCILLARY_CHUNKS READ_ANCILLARY_CHUNKS_NOT_SUPPORTED
+
+option WRITE_ANCILLARY_CHUNKS requires WRITE
+# PNG_WRITE_ANCILLARY_CHUNKS_NOT_SUPPORTED is deprecated.
+= NO_WRITE_ANCILLARY_CHUNKS WRITE_ANCILLARY_CHUNKS_NOT_SUPPORTED
+
+# These options disable *all* the text chunks if turned off
+
+option TEXT disabled
+option READ_TEXT requires READ_ANCILLARY_CHUNKS enables TEXT
+option WRITE_TEXT requires WRITE_ANCILLARY_CHUNKS enables TEXT
+
+# Moved to pnglibconf.h at libpng-1.5.0
+# Feature support: in 1.4 this was in pngconf.h, but the following
+# features have no affect on the libpng API.  Add library
+# only features to the end of this list.  Add features that
+# affect the API above.  (Note: the list of chunks follows
+# the library-only settings.)
+#
+# BUILD TIME ONLY OPTIONS
+#   These options do not affect the API but rather alter how the
+#   API is implemented, they get recorded in pnglibconf.h, but
+#   can't be changed by the application.
+
+# Colorspace support (enabled as required); just the support for colorant
+# information.  Gamma support, likewise, is just support for the gamma
+# information, READ_GAMMA is required for gamma transformations (so it
+# is possible to read PNG gamma without enabling all the libpng transform
+# code - do this for applications that do their own gamma processing)
+#
+# As of 1.6.0 COLORSPACE is only useful if the application processes the
+# information; this is because the library does not do any colorspace
+# processing, it just validates the data in the PNG file.
+
+option GAMMA disabled
+option COLORSPACE enables GAMMA disabled
+
+# When an ICC profile is read, or png_set, it will be checked for a match
+# against known sRGB profiles if the sRGB handling is enabled.  The
+# PNG_sRGB_PROFILE_CHECKS setting controls how much work is done during the
+# check:
+#
+# -1: Don't do any sRGB profile checking.
+#
+#  0: Just validate the profile MD5 signature if present, otherwise use
+#     the checks in option 1.
+#
+#  1: Additionally check the length, intent and adler32 checksum of the
+#     actual data.   If enabled this will reject known profiles that have
+#     had the rendering intent in the header changed as well as other edits
+#     done without updating the checksum.  See the discussion below.
+#
+#  2: Additionally checksum all the data using the ethernet CRC32 algorithm.
+#     This makes it more difficult to fake profiles and makes it less likely
+#     to get a false positive on profiles with no signature, but is probably
+#     just a waste of time since all currently approved ICC sRGB profiles have
+#     a secure MD5 signature.
+#
+# The rendering intent.  An ICC profile stores an intended rendering intent,
+# but does not include the value in the signature.  The intent is documented
+# as the intent that should be used when combining two profiles.  The sRGB
+# profile is intended, however, to be used with any of the four defined intents.
+# For this reason the sRGB chunk includes an 'intent' to be used when displaying
+# the image (intent is really a property of the image not the profile.)
+#
+# Unfortunately the iCCP chunk does not.  It may therefore be that some
+# applications modify the intent in profiles (including sRGB profiles) to work
+# round this problem.  Selecting an option other than option '0' will cause such
+# modified profiles to be rejected.
+#
+# Security.  The use of Adler32 and CRC32 checksums does not help significantly
+# with any security issues.  It is relatively easy to produce arbitrary profiles
+# with the required checksums on current computer systems.  Nevertheless
+# security does not seem to be an issue because the only consequence of a false
+# positive is a false assertion that the profile is an sRGB profile.  This might
+# be used to hide data from libpng using applications, but it doesn't seem
+# possible to damage them.
+
+setting sRGB_PROFILE_CHECKS default 2
+
+# Artificially align memory - the code typically aligns to 8 byte
+# boundaries if this is switched on, it's a small waste of space
+# but can help (in theory) on some architectures.  Only affects
+# internal structures.  Added at libpng 1.4.0
+
+option ALIGNED_MEMORY
+
+# Buggy compilers (e.g., gcc 2.7.2.2) need PNG_NO_POINTER_INDEXING
+# See png[wr]util.c, normally this should always be *on*
+
+option POINTER_INDEXING
+
+# Other defines for things like memory and the like can go here.
+
+# BUILD TIME SETTINGS
+# Like build time options these do not affect the API, but they
+# may be useful to applications because they record details of
+# how the API will behave particularly with regard to overall
+# accuracy.
+
+# This controls how fine the quantizing gets.  As this allocates
+# a largish chunk of memory (32K), those who are not as concerned
+# with quantizing quality can decrease some or all of these.
+
+setting QUANTIZE_RED_BITS default 5
+setting QUANTIZE_GREEN_BITS default 5
+setting QUANTIZE_BLUE_BITS default 5
+
+# This controls how fine the gamma correction becomes when you
+# are only interested in 8 bits anyway.  Increasing this value
+# results in more memory being used, and more pow() functions
+# being called to fill in the gamma tables.  Don't set this value
+# less than 8, and even that may not work (I haven't tested it).
+
+setting MAX_GAMMA_8 default 11
+
+# This controls how much a difference in gamma we can tolerate before
+# we actually start doing gamma conversion, it's a fixed point value,
+# so the default below is 0.05, meaning libpng ignores corrections in
+# the range 0.95 to 1.05
+
+setting GAMMA_THRESHOLD_FIXED default 5000
+
+# Precision to use when converting a floating point value to a PNG
+# extension format string in an sCAL chunk (only relevant if the
+# floating point API is enabled)
+
+setting sCAL_PRECISION default 5
+
+# This is the size of the compression buffer, and thus the size of
+# an IDAT chunk.  Make this whatever size you feel is best for your
+# machine.  One of these will be allocated per png_struct.  When this
+# is full, it writes the data to the disk, and does some other
+# calculations.  Making this an extremely small size may slow
+# the library down, but you may want to experiment to determine
+# where it becomes significant, if you are concerned with memory
+# usage.  Note that zlib allocates at least 32Kb also.  For readers,
+# this describes the size of the buffer available to read the data in.
+# Unless this gets smaller than the size of a row (compressed),
+# it should not make much difference how big this is.
+
+setting ZBUF_SIZE default 8192
+
+# This is the size of the decompression buffer used when counting or checking
+# the decompressed size of an LZ stream from a compressed ancillary chunk; the
+# decompressed data is never used so a different size may be optimal.  This size
+# was determined using contrib/libtests/timepng.c with compressed zTXt data
+# around 11MByte in size.  Slight speed improvements (up to about 14% in
+# timepng) can be achieved by very large increases (to 32kbyte) on regular data,
+# but highly compressible data shows only around 2% improvement.   The size is
+# chosen to minimize the effects of DoS attacks based on using very large
+# amounts of highly compressible data.
+
+setting INFLATE_BUF_SIZE default 1024
+
+# This is the maximum amount of IDAT data that the sequential reader will
+# process at one time.  The setting does not affect the size of IDAT chunks
+# read, just the amount read at once.  Neither does it affect the progressive
+# reader, which processes just the amount of data the application gives it.
+# The sequential reader is currently unable to process more than one IDAT at
+# once - it has to read and process each one in turn.  There is no point setting
+# this to a value larger than the IDAT chunks typically encountered (it would
+# just waste memory) but there may be some point in reducing it below the value
+# of ZBUF_SIZE (the size of IDAT chunks written by libpng.)
+
+setting IDAT_READ_SIZE default PNG_ZBUF_SIZE
+
+# Ancillary chunks
+chunk bKGD
+chunk cHRM enables COLORSPACE
+chunk eXIf
+chunk gAMA enables GAMMA
+chunk hIST
+chunk iCCP enables COLORSPACE, GAMMA
+chunk iTXt enables TEXT
+chunk oFFs
+chunk pCAL
+chunk pHYs
+chunk sBIT
+chunk sCAL
+chunk sPLT
+chunk sRGB enables COLORSPACE, GAMMA, SET_OPTION
+chunk tEXt requires TEXT
+chunk tIME
+chunk tRNS
+chunk zTXt enables TEXT
+
+# This only affects support of the optional PLTE chunk in RGB and RGBA
+# images.  Notice that READ_ANCILLARY_CHUNKS therefore disables part
+# of the regular chunk reading too.
+
+option READ_OPT_PLTE requires READ_ANCILLARY_CHUNKS
+
+# Unknown chunk handling
+#
+# 'UNKNOWN_CHUNKS' is a global option to disable all unknown chunk handling on
+# read or write; everything else below requires it (directly or indirectly).
+option UNKNOWN_CHUNKS
+
+# There are three main options to control the ability to read and write unknown
+# chunks.  If either read option is turned on then unknown chunks will be read,
+# otherwise they are skipped.  If the write option is turned on unknown chunks
+# set by png_set_unknown_chunks will be written otherwise it is an error to call
+# that API on a write struct.
+option WRITE_UNKNOWN_CHUNKS requires WRITE requires UNKNOWN_CHUNKS
+option WRITE_UNKNOWN_CHUNKS enables STORE_UNKNOWN_CHUNKS
+
+# The first way to read user chunks is to have libpng save them for a later call
+# to png_get_unknown_chunks, the application must call
+# png_set_keep_unknown_chunks to cause this to actually happen (see png.h)
+option SAVE_UNKNOWN_CHUNKS requires READ requires SET_UNKNOWN_CHUNKS
+option SAVE_UNKNOWN_CHUNKS enables READ_UNKNOWN_CHUNKS, STORE_UNKNOWN_CHUNKS
+
+# The second approach is to use an application provided callback to process the
+# chunks, the callback can either handle the chunk entirely itself or request
+# that libpng store the chunk for later retrieval via png_get_unknown_chunks.
+#
+# NOTE: If STORE_UNKNOWN_CHUNKS is not enabled (which is the default if
+# both SAVE_UNKNOWN_CHUNKS and WRITE_UNKNOWN_CHUNKS are disabled) then a
+# 0 result from the callback will be ignored because no support for saving
+# unknown chunks has been compiled in.  The normal symptom is that your app
+# fails to compile because png_get_unknown_chunks is no longer defined in png.h.
+# If you encounter this issue simply enable STORE_UNKNOWN_CHUNKS in your build.
+#
+# Note that there is no 'WRITE_USER_CHUNKS' so the USER_CHUNKS option is always
+# the same as READ_USER_CHUNKS at present
+option READ_USER_CHUNKS requires READ, UNKNOWN_CHUNKS
+option READ_USER_CHUNKS enables READ_UNKNOWN_CHUNKS, USER_CHUNKS
+
+# Two further options are provided to allow detailed control of the handling.
+# The first enables png_set_keep_unknown_chunks; this allows the default to be
+# changed from discarding unknown chunks and allows per-chunk control.  This is
+# required to use the SAVE_UNKNOWN_CHUNKS option.  If enabled this option also
+# applies to write (see png.h), otherwise the write API simply writes all the
+# chunks it is given.
+#
+# The second option extends the unknown handling to allow known chunks to be
+# handled as though they were unknown.  This option doesn't change any APIs, it
+# merely turns on the code to check known as well as unknown chunks.
+#
+# This option no longer affects the write code.  It can be safely disabled and
+# will prevent applications stopping libpng reading known chunks.
+option SET_UNKNOWN_CHUNKS requires UNKNOWN_CHUNKS
+option HANDLE_AS_UNKNOWN requires SET_UNKNOWN_CHUNKS
+
+# The following options are derived from the above and should not be turned on
+# explicitly.
+option READ_UNKNOWN_CHUNKS requires UNKNOWN_CHUNKS disabled
+option STORE_UNKNOWN_CHUNKS requires UNKNOWN_CHUNKS disabled
+
+option CONVERT_tIME requires WRITE_ANCILLARY_CHUNKS
+# The "tm" structure is not supported on WindowsCE
+
+@#ifdef _WIN32_WCE
+@#   define PNG_NO_CONVERT_tIME
+@#endif
+
+option WRITE_FILTER requires WRITE
+
+option SAVE_INT_32 disabled
+# png_save_int_32 is required internally for writing the ancillary chunks oFFs
+# and pCAL and for both reading and writing iCCP (for the generation/checking of
+# the corresponding cHRM/gAMA chunks) if full ICC is supported.
+
+# added at libpng-1.5.4
+
+option WRITE_OPTIMIZE_CMF requires WRITE
+
+option READ_COMPRESSED_TEXT disabled
+option READ_iCCP enables READ_COMPRESSED_TEXT
+option READ_iTXt enables READ_COMPRESSED_TEXT
+option READ_zTXt enables READ_COMPRESSED_TEXT
+
+option WRITE_oFFs enables SAVE_INT_32
+option WRITE_pCAL enables SAVE_INT_32
+option WRITE_cHRM enables SAVE_INT_32
+
+option WRITE_COMPRESSED_TEXT disabled
+option WRITE_iCCP enables WRITE_COMPRESSED_TEXT
+option WRITE_iTXt enables WRITE_COMPRESSED_TEXT
+option WRITE_zTXt enables WRITE_COMPRESSED_TEXT
+
+# Turn this off to disable png_read_png() and png_write_png() and
+# leave the row_pointers member out of the info structure.
+
+option INFO_IMAGE
+
+# added at libpng-1.5.10
+# Turn this off to disable warning about invalid palette index and
+# leave the num_palette_max member out of the png structure.
+
+option CHECK_FOR_INVALID_INDEX enables READ_CHECK_FOR_INVALID_INDEX
+option CHECK_FOR_INVALID_INDEX enables WRITE_CHECK_FOR_INVALID_INDEX
+option READ_CHECK_FOR_INVALID_INDEX requires READ, CHECK_FOR_INVALID_INDEX
+option WRITE_CHECK_FOR_INVALID_INDEX requires WRITE, CHECK_FOR_INVALID_INDEX
+
+# added at libpng-1.5.15
+option GET_PALETTE_MAX enables READ_GET_PALETTE_MAX WRITE_GET_PALETTE_MAX
+option READ_GET_PALETTE_MAX requires READ_CHECK_FOR_INVALID_INDEX disabled
+option WRITE_GET_PALETTE_MAX requires WRITE_CHECK_FOR_INVALID_INDEX disabled
+
+# Simplified API options (added at libpng-1.6.0)
+#  In libpng 1.6.8 the handling of these options was changed to used 'requires'
+#  throughout, so that disabling some of the low level support always disables
+#  the base simplified read/write API.  This much simplifies the handling and
+#  makes 'everything = off' work in a more intuitive way.  It eliminates a
+#  previously reported feature that APIs previously enabled by the simplified
+#  API couldn't be turned off without explicitly turning off the simplified
+#  APIs.
+#
+# Read:
+option SIMPLIFIED_READ,
+   requires SEQUENTIAL_READ, READ_TRANSFORMS, SETJMP, BENIGN_ERRORS,
+      READ_EXPAND, READ_16BIT, READ_EXPAND_16, READ_SCALE_16_TO_8,
+      READ_RGB_TO_GRAY, READ_ALPHA_MODE, READ_BACKGROUND, READ_STRIP_ALPHA,
+      READ_FILLER, READ_SWAP, READ_PACK, READ_GRAY_TO_RGB, READ_GAMMA,
+      READ_tRNS, READ_bKGD, READ_gAMA, READ_cHRM, READ_sRGB, READ_sBIT
+
+# AFIRST and BGR read options:
+#  Prior to libpng 1.6.8 these were disabled but switched on if the low level
+#  libpng routines that do the swaps were enabled.  This worked but was
+#  confusing.  In libpng 1.6.8 the options were changed to simple 'requires'
+#  and are enabled by default.  This should work the same way in practice.
+option SIMPLIFIED_READ_AFIRST enables FORMAT_AFIRST,
+   requires SIMPLIFIED_READ READ_SWAP_ALPHA
+
+option SIMPLIFIED_READ_BGR enables FORMAT_BGR,
+   requires SIMPLIFIED_READ READ_BGR
+
+# Write:
+option SIMPLIFIED_WRITE,
+   requires WRITE, SETJMP, WRITE_SWAP, WRITE_PACK,
+      WRITE_tRNS, WRITE_gAMA, WRITE_sRGB, WRITE_cHRM
+
+# 1.6.22: allow simplified write without stdio support:
+option SIMPLIFIED_WRITE_STDIO requires SIMPLIFIED_WRITE STDIO
+
+option SIMPLIFIED_WRITE_AFIRST enables FORMAT_AFIRST,
+   requires SIMPLIFIED_WRITE WRITE_SWAP_ALPHA
+
+option SIMPLIFIED_WRITE_BGR enables FORMAT_BGR,
+   requires SIMPLIFIED_WRITE WRITE_BGR
+
+# Formats:
+option FORMAT_AFIRST disabled
+option FORMAT_BGR disabled
--- a/xs/src/png/libpng/scripts/prefix.c
+++ b/xs/src/png/libpng/scripts/prefix.c
@ -0,0 +1,24 @@
+
+/* prefix.c - generate an unprefixed symbol list
+ *
+ * Last changed in libpng version 1.6.16 [December 22, 2014]
+ * Copyright (c) 2013-2014 Glenn Randers-Pehrson
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#define PNG_EXPORTA(ordinal, type, name, args, attributes)\
+        PNG_DFN "@" name "@"
+
+/* The configuration information *before* the additional of symbol renames,
+ * the list is the C name list; no symbol prefix.
+ */
+#include "pnglibconf.out"
+
+PNG_DFN_START_SORT 1
+
+#include "../png.h"
+
+PNG_DFN_END_SORT
--- a/xs/src/png/libpng/scripts/sym.c
+++ b/xs/src/png/libpng/scripts/sym.c
@ -0,0 +1,15 @@
+
+/* sym.c - define format of libpng.sym
+ *
+ * Last changed in libpng version 1.6.16 [December 22, 2014]
+ * Copyright (c) 2011-2014 Glenn Randers-Pehrson
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#define PNG_EXPORTA(ordinal, type, name, args, attributes)\
+        PNG_DFN "@" SYMBOL_PREFIX "@@" name "@"
+
+#include "../png.h"
--- a/xs/src/png/libpng/scripts/symbols.c
+++ b/xs/src/png/libpng/scripts/symbols.c
@ -0,0 +1,58 @@
+
+/* symbols.c - find all exported symbols
+ *
+ * Last changed in libpng version 1.6.16 [December 22, 2014]
+ * Copyright (c) 2011-2014 Glenn Randers-Pehrson
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+/* NOTE: making 'symbols.chk' checks both that the exported
+ * symbols in the library don't change and (implicitly) that
+ * scripts/pnglibconf.h.prebuilt is as expected.
+ * If scripts/pnglibconf.h.prebuilt is remade using
+ * scripts/pnglibconf.dfa then this checks the .dfa file too.
+ */
+
+#define PNG_EXPORTA(ordinal, type, name, args, attributes)\
+        PNG_DFN "@" name "@ @@" ordinal "@"
+#define PNG_REMOVED(ordinal, type, name, args, attributes)\
+        PNG_DFN "; @" name "@ @@" ordinal "@"
+#define PNG_EXPORT_LAST_ORDINAL(ordinal)\
+        PNG_DFN "; @@" ordinal "@"
+
+/* Read the defaults, but use scripts/pnglibconf.h.prebuilt; the 'standard'
+ * header file.
+ */
+#include "pnglibconf.h.prebuilt"
+#include "../png.h"
+
+/* Some things are turned off by default.  Turn these things
+ * on here (by hand) to get the APIs they expose and validate
+ * that no harm is done.  This list is the set of options
+ * defaulted to 'off' in scripts/pnglibconf.dfa
+ *
+ * Maintenance: if scripts/pnglibconf.dfa options are changed
+ * from, or to, 'disabled' this needs updating!
+ */
+#define PNG_BENIGN_ERRORS_SUPPORTED
+#define PNG_ERROR_NUMBERS_SUPPORTED
+#define PNG_READ_BIG_ENDIAN_SUPPORTED  /* should do nothing! */
+#define PNG_INCH_CONVERSIONS_SUPPORTED
+#define PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED
+#define PNG_SET_OPTION_SUPPORTED
+
+#undef PNG_H
+#include "../png.h"
+
+/* Finally there are a couple of places where option support
+ * actually changes the APIs revealed using a #if/#else/#endif
+ * test in png.h, test these here.
+ */
+#undef  PNG_FLOATING_POINT_SUPPORTED /* Exposes 'fixed' APIs */
+#undef  PNG_ERROR_TEXT_SUPPORTED     /* Exposes unsupported APIs */
+
+#undef PNG_H
+#include "../png.h"
--- a/xs/src/png/libpng/scripts/symbols.def
+++ b/xs/src/png/libpng/scripts/symbols.def
@ -0,0 +1,256 @@
+;Version 1.6.35beta02
+;--------------------------------------------------------------
+; LIBPNG symbol list as a Win32 DEF file
+; Contains all the symbols that can be exported from libpng
+;--------------------------------------------------------------
+LIBRARY
+
+EXPORTS
+ png_access_version_number @1
+ png_set_sig_bytes @2
+ png_sig_cmp @3
+ png_create_read_struct @4
+ png_create_write_struct @5
+ png_get_compression_buffer_size @6
+ png_set_compression_buffer_size @7
+ png_set_longjmp_fn @8
+ png_longjmp @9
+ png_reset_zstream @10
+ png_create_read_struct_2 @11
+ png_create_write_struct_2 @12
+ png_write_sig @13
+ png_write_chunk @14
+ png_write_chunk_start @15
+ png_write_chunk_data @16
+ png_write_chunk_end @17
+ png_create_info_struct @18
+ png_info_init_3 @19
+ png_write_info_before_PLTE @20
+ png_write_info @21
+ png_read_info @22
+ png_convert_to_rfc1123 @23
+ png_convert_from_struct_tm @24
+ png_convert_from_time_t @25
+ png_set_expand @26
+ png_set_expand_gray_1_2_4_to_8 @27
+ png_set_palette_to_rgb @28
+ png_set_tRNS_to_alpha @29
+ png_set_bgr @30
+ png_set_gray_to_rgb @31
+ png_set_rgb_to_gray @32
+ png_set_rgb_to_gray_fixed @33
+ png_get_rgb_to_gray_status @34
+ png_build_grayscale_palette @35
+ png_set_strip_alpha @36
+ png_set_swap_alpha @37
+ png_set_invert_alpha @38
+ png_set_filler @39
+ png_set_add_alpha @40
+ png_set_swap @41
+ png_set_packing @42
+ png_set_packswap @43
+ png_set_shift @44
+ png_set_interlace_handling @45
+ png_set_invert_mono @46
+ png_set_background @47
+ png_set_strip_16 @48
+ png_set_quantize @49
+ png_set_gamma @50
+ png_set_flush @51
+ png_write_flush @52
+ png_start_read_image @53
+ png_read_update_info @54
+ png_read_rows @55
+ png_read_row @56
+ png_read_image @57
+ png_write_row @58
+ png_write_rows @59
+ png_write_image @60
+ png_write_end @61
+ png_read_end @62
+ png_destroy_info_struct @63
+ png_destroy_read_struct @64
+ png_destroy_write_struct @65
+ png_set_crc_action @66
+ png_set_filter @67
+ png_set_filter_heuristics @68
+ png_set_compression_level @69
+ png_set_compression_mem_level @70
+ png_set_compression_strategy @71
+ png_set_compression_window_bits @72
+ png_set_compression_method @73
+ png_init_io @74
+ png_set_error_fn @75
+ png_get_error_ptr @76
+ png_set_write_fn @77
+ png_set_read_fn @78
+ png_get_io_ptr @79
+ png_set_read_status_fn @80
+ png_set_write_status_fn @81
+ png_set_mem_fn @82
+ png_get_mem_ptr @83
+ png_set_read_user_transform_fn @84
+ png_set_write_user_transform_fn @85
+ png_set_user_transform_info @86
+ png_get_user_transform_ptr @87
+ png_set_read_user_chunk_fn @88
+ png_get_user_chunk_ptr @89
+ png_set_progressive_read_fn @90
+ png_get_progressive_ptr @91
+ png_process_data @92
+ png_progressive_combine_row @93
+ png_malloc @94
+ png_calloc @95
+ png_malloc_warn @96
+ png_free @97
+ png_free_data @98
+ png_data_freer @99
+ png_malloc_default @100
+ png_free_default @101
+ png_error @102
+ png_chunk_error @103
+ png_err @104
+ png_warning @105
+ png_chunk_warning @106
+ png_benign_error @107
+ png_chunk_benign_error @108
+ png_set_benign_errors @109
+ png_get_valid @110
+ png_get_rowbytes @111
+ png_get_rows @112
+ png_set_rows @113
+ png_get_channels @114
+ png_get_image_width @115
+ png_get_image_height @116
+ png_get_bit_depth @117
+ png_get_color_type @118
+ png_get_filter_type @119
+ png_get_interlace_type @120
+ png_get_compression_type @121
+ png_get_pixels_per_meter @122
+ png_get_x_pixels_per_meter @123
+ png_get_y_pixels_per_meter @124
+ png_get_pixel_aspect_ratio @125
+ png_get_x_offset_pixels @126
+ png_get_y_offset_pixels @127
+ png_get_x_offset_microns @128
+ png_get_y_offset_microns @129
+ png_get_signature @130
+ png_get_bKGD @131
+ png_set_bKGD @132
+ png_get_cHRM @133
+ png_get_cHRM_fixed @134
+ png_set_cHRM @135
+ png_set_cHRM_fixed @136
+ png_get_gAMA @137
+ png_get_gAMA_fixed @138
+ png_set_gAMA @139
+ png_set_gAMA_fixed @140
+ png_get_hIST @141
+ png_set_hIST @142
+ png_get_IHDR @143
+ png_set_IHDR @144
+ png_get_oFFs @145
+ png_set_oFFs @146
+ png_get_pCAL @147
+ png_set_pCAL @148
+ png_get_pHYs @149
+ png_set_pHYs @150
+ png_get_PLTE @151
+ png_set_PLTE @152
+ png_get_sBIT @153
+ png_set_sBIT @154
+ png_get_sRGB @155
+ png_set_sRGB @156
+ png_set_sRGB_gAMA_and_cHRM @157
+ png_get_iCCP @158
+ png_set_iCCP @159
+ png_get_sPLT @160
+ png_set_sPLT @161
+ png_get_text @162
+ png_set_text @163
+ png_get_tIME @164
+ png_set_tIME @165
+ png_get_tRNS @166
+ png_set_tRNS @167
+ png_get_sCAL @168
+ png_get_sCAL_s @169
+ png_set_sCAL @170
+ png_set_sCAL_s @171
+ png_set_keep_unknown_chunks @172
+ png_handle_as_unknown @173
+ png_set_unknown_chunks @174
+ png_set_unknown_chunk_location @175
+ png_get_unknown_chunks @176
+ png_set_invalid @177
+ png_read_png @178
+ png_write_png @179
+ png_get_copyright @180
+ png_get_header_ver @181
+ png_get_header_version @182
+ png_get_libpng_ver @183
+ png_permit_mng_features @184
+ png_set_strip_error_numbers @185
+ png_set_user_limits @186
+ png_get_user_width_max @187
+ png_get_user_height_max @188
+ png_set_chunk_cache_max @189
+ png_get_chunk_cache_max @190
+ png_set_chunk_malloc_max @191
+ png_get_chunk_malloc_max @192
+ png_get_pixels_per_inch @193
+ png_get_x_pixels_per_inch @194
+ png_get_y_pixels_per_inch @195
+ png_get_x_offset_inches @196
+ png_get_y_offset_inches @197
+ png_get_pHYs_dpi @198
+ png_get_io_state @199
+ png_get_uint_32 @201
+ png_get_uint_16 @202
+ png_get_int_32 @203
+ png_get_uint_31 @204
+ png_save_uint_32 @205
+ png_save_int_32 @206
+ png_save_uint_16 @207
+ png_set_gamma_fixed @208
+ png_set_filter_heuristics_fixed @209
+ png_get_pixel_aspect_ratio_fixed @210
+ png_get_x_offset_inches_fixed @211
+ png_get_y_offset_inches_fixed @212
+ png_set_sCAL_fixed @213
+ png_get_sCAL_fixed @214
+ png_set_background_fixed @215
+ png_get_io_chunk_type @216
+ png_get_current_row_number @217
+ png_get_current_pass_number @218
+ png_process_data_pause @219
+ png_process_data_skip @220
+ png_set_expand_16 @221
+ png_set_text_compression_level @222
+ png_set_text_compression_mem_level @223
+ png_set_text_compression_strategy @224
+ png_set_text_compression_window_bits @225
+ png_set_text_compression_method @226
+ png_set_alpha_mode @227
+ png_set_alpha_mode_fixed @228
+ png_set_scale_16 @229
+ png_get_cHRM_XYZ @230
+ png_get_cHRM_XYZ_fixed @231
+ png_set_cHRM_XYZ @232
+ png_set_cHRM_XYZ_fixed @233
+ png_image_begin_read_from_file @234
+ png_image_begin_read_from_stdio @235
+ png_image_begin_read_from_memory @236
+ png_image_finish_read @237
+ png_image_free @238
+ png_image_write_to_file @239
+ png_image_write_to_stdio @240
+ png_convert_to_rfc1123_buffer @241
+ png_set_check_for_invalid_index @242
+ png_get_palette_max @243
+ png_set_option @244
+ png_image_write_to_memory @245
+ png_get_eXIf @246
+ png_set_eXIf @247
+ png_get_eXIf_1 @248
+ png_set_eXIf_1 @249
--- a/xs/src/png/libpng/scripts/vers.c
+++ b/xs/src/png/libpng/scripts/vers.c
@ -0,0 +1,19 @@
+
+/* vers.c - define format of libpng.vers
+ *
+ * Last changed in libpng version 1.6.16 [December 22, 2014]
+ * Copyright (c) 2011-2014 Glenn Randers-Pehrson
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#define PNG_EXPORTA(ordinal, type, name, args, attributes)\
+        PNG_DFN " @" SYMBOL_PREFIX "@@" name "@;"
+
+PNG_DFN "@" PNGLIB_LIBNAME "@ {global:"
+
+#include "../png.h"
+
+PNG_DFN "local: *; };"
--- a/xs/src/png/zlib/CMakeLists.txt
+++ b/xs/src/png/zlib/CMakeLists.txt
@ -201,13 +201,15 @@ endif()

 if(UNIX)
    # On unix-like platforms the library is almost always called libz
-   set_target_properties(zlib zlibstatic PROPERTIES OUTPUT_NAME z)
+   set_target_properties(
+   # zlib 
+   zlibstatic PROPERTIES OUTPUT_NAME z)
   if(NOT APPLE)
-     set_target_properties(zlib PROPERTIES LINK_FLAGS "-Wl,--version-script,\"${CMAKE_CURRENT_SOURCE_DIR}/zlib.map\"")
+     # set_target_properties(zlib PROPERTIES LINK_FLAGS "-Wl,--version-script,\"${CMAKE_CURRENT_SOURCE_DIR}/zlib.map\"")
   endif()
 elseif(BUILD_SHARED_LIBS AND WIN32)
    # Creates zlib1.dll when building shared library version
-    set_target_properties(zlib PROPERTIES SUFFIX "1.dll")
+    # set_target_properties(zlib PROPERTIES SUFFIX "1.dll")
 endif()

 if(NOT SKIP_INSTALL_LIBRARIES AND NOT SKIP_INSTALL_ALL )